Home | History | Annotate | Line # | Download | only in lfs
lfs_alloc.c revision 1.119.6.4
      1 /*	$NetBSD: lfs_alloc.c,v 1.119.6.4 2016/10/05 20:56:12 skrll Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 /*
     32  * Copyright (c) 1991, 1993
     33  *	The Regents of the University of California.  All rights reserved.
     34  *
     35  * Redistribution and use in source and binary forms, with or without
     36  * modification, are permitted provided that the following conditions
     37  * are met:
     38  * 1. Redistributions of source code must retain the above copyright
     39  *    notice, this list of conditions and the following disclaimer.
     40  * 2. Redistributions in binary form must reproduce the above copyright
     41  *    notice, this list of conditions and the following disclaimer in the
     42  *    documentation and/or other materials provided with the distribution.
     43  * 3. Neither the name of the University nor the names of its contributors
     44  *    may be used to endorse or promote products derived from this software
     45  *    without specific prior written permission.
     46  *
     47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     57  * SUCH DAMAGE.
     58  *
     59  *	@(#)lfs_alloc.c	8.4 (Berkeley) 1/4/94
     60  */
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.119.6.4 2016/10/05 20:56:12 skrll Exp $");
     64 
     65 #if defined(_KERNEL_OPT)
     66 #include "opt_quota.h"
     67 #endif
     68 
     69 #include <sys/param.h>
     70 #include <sys/systm.h>
     71 #include <sys/kernel.h>
     72 #include <sys/buf.h>
     73 #include <sys/lock.h>
     74 #include <sys/vnode.h>
     75 #include <sys/syslog.h>
     76 #include <sys/mount.h>
     77 #include <sys/malloc.h>
     78 #include <sys/pool.h>
     79 #include <sys/proc.h>
     80 #include <sys/kauth.h>
     81 
     82 #include <ufs/lfs/ulfs_quotacommon.h>
     83 #include <ufs/lfs/ulfs_inode.h>
     84 #include <ufs/lfs/ulfsmount.h>
     85 #include <ufs/lfs/ulfs_extern.h>
     86 
     87 #include <ufs/lfs/lfs.h>
     88 #include <ufs/lfs/lfs_accessors.h>
     89 #include <ufs/lfs/lfs_extern.h>
     90 #include <ufs/lfs/lfs_kernel.h>
     91 
     92 /* Constants for inode free bitmap */
     93 #define BMSHIFT 5	/* 2 ** 5 = 32 */
     94 #define BMMASK  ((1 << BMSHIFT) - 1)
     95 #define SET_BITMAP_FREE(F, I) do { \
     96 	DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), 	\
     97 	     (int)((I) >> BMSHIFT), (int)((I) & BMMASK)));		\
     98 	(F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1 << ((I) & BMMASK));	\
     99 } while (0)
    100 #define CLR_BITMAP_FREE(F, I) do { \
    101 	DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), 	\
    102 	     (int)((I) >> BMSHIFT), (int)((I) & BMMASK)));		\
    103 	(F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1 << ((I) & BMMASK));	\
    104 } while(0)
    105 
    106 #define ISSET_BITMAP_FREE(F, I) \
    107 	((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1 << ((I) & BMMASK)))
    108 
    109 /*
    110  * Add a new block to the Ifile, to accommodate future file creations.
    111  * Called with the segment lock held.
    112  */
    113 int
    114 lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred)
    115 {
    116 	struct vnode *vp;
    117 	struct inode *ip;
    118 	IFILE64 *ifp64;
    119 	IFILE32 *ifp32;
    120 	IFILE_V1 *ifp_v1;
    121 	struct buf *bp, *cbp;
    122 	int error;
    123 	daddr_t i, blkno, xmax;
    124 	ino_t oldlast, maxino;
    125 	CLEANERINFO *cip;
    126 
    127 	ASSERT_SEGLOCK(fs);
    128 
    129 	/* XXX should check or assert that we aren't readonly. */
    130 
    131 	/*
    132 	 * Get a block and extend the ifile inode. Leave the buffer for
    133 	 * the block in bp.
    134 	 */
    135 
    136 	vp = fs->lfs_ivnode;
    137 	ip = VTOI(vp);
    138 	blkno = lfs_lblkno(fs, ip->i_size);
    139 	if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0,
    140 				&bp)) != 0) {
    141 		return (error);
    142 	}
    143 	ip->i_size += lfs_sb_getbsize(fs);
    144 	lfs_dino_setsize(fs, ip->i_din, ip->i_size);
    145 	uvm_vnp_setsize(vp, ip->i_size);
    146 
    147 	/*
    148 	 * Compute the new number of inodes, and reallocate the in-memory
    149 	 * inode freemap.
    150 	 */
    151 
    152 	maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
    153 		  lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
    154 	fs->lfs_ino_bitmap = (lfs_bm_t *)
    155 		realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) *
    156 			sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK);
    157 	KASSERT(fs->lfs_ino_bitmap != NULL);
    158 
    159 	/* first new inode number */
    160 	i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) *
    161 		lfs_sb_getifpb(fs);
    162 
    163 	/*
    164 	 * We insert the new inodes at the head of the free list.
    165 	 * Under normal circumstances, the free list is empty here,
    166 	 * so we are also incidentally placing them at the end (which
    167 	 * we must do if we are to keep them in order).
    168 	 */
    169 	LFS_GET_HEADFREE(fs, cip, cbp, &oldlast);
    170 	LFS_PUT_HEADFREE(fs, cip, cbp, i);
    171 #ifdef DIAGNOSTIC
    172 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM)
    173 		panic("inode 0 allocated [2]");
    174 #endif /* DIAGNOSTIC */
    175 
    176 	/* inode number to stop at (XXX: why *x*max?) */
    177 	xmax = i + lfs_sb_getifpb(fs);
    178 
    179 	/*
    180 	 * Initialize the ifile block.
    181 	 *
    182 	 * XXX: these loops should be restructured to use the accessor
    183 	 * functions instead of using cutpaste polymorphism.
    184 	 */
    185 
    186 	if (fs->lfs_is64) {
    187 		for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) {
    188 			SET_BITMAP_FREE(fs, i);
    189 			ifp64->if_version = 1;
    190 			ifp64->if_daddr = LFS_UNUSED_DADDR;
    191 			ifp64->if_nextfree = ++i;
    192 		}
    193 		ifp64--;
    194 		ifp64->if_nextfree = oldlast;
    195 	} else if (lfs_sb_getversion(fs) > 1) {
    196 		for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) {
    197 			SET_BITMAP_FREE(fs, i);
    198 			ifp32->if_version = 1;
    199 			ifp32->if_daddr = LFS_UNUSED_DADDR;
    200 			ifp32->if_nextfree = ++i;
    201 		}
    202 		ifp32--;
    203 		ifp32->if_nextfree = oldlast;
    204 	} else {
    205 		for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) {
    206 			SET_BITMAP_FREE(fs, i);
    207 			ifp_v1->if_version = 1;
    208 			ifp_v1->if_daddr = LFS_UNUSED_DADDR;
    209 			ifp_v1->if_nextfree = ++i;
    210 		}
    211 		ifp_v1--;
    212 		ifp_v1->if_nextfree = oldlast;
    213 	}
    214 	LFS_PUT_TAILFREE(fs, cip, cbp, xmax - 1);
    215 
    216 	/*
    217 	 * Write out the new block.
    218 	 */
    219 
    220 	(void) LFS_BWRITE_LOG(bp); /* Ifile */
    221 
    222 	return 0;
    223 }
    224 
    225 /*
    226  * Allocate an inode for a new file.
    227  *
    228  * Takes the segment lock. Also (while holding it) takes lfs_lock
    229  * to frob fs->lfs_fmod.
    230  *
    231  * XXX: the mode argument is unused; should just get rid of it.
    232  */
    233 /* ARGSUSED */
    234 /* VOP_BWRITE 2i times */
    235 int
    236 lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
    237     ino_t *ino, int *gen)
    238 {
    239 	struct lfs *fs;
    240 	struct buf *bp, *cbp;
    241 	IFILE *ifp;
    242 	int error;
    243 	CLEANERINFO *cip;
    244 
    245 	fs = VTOI(pvp)->i_lfs;
    246 	if (fs->lfs_ronly)
    247 		return EROFS;
    248 
    249 	ASSERT_NO_SEGLOCK(fs);
    250 
    251 	lfs_seglock(fs, SEGM_PROT);
    252 
    253 	/* Get the head of the freelist. */
    254 	LFS_GET_HEADFREE(fs, cip, cbp, ino);
    255 
    256 	/* paranoia */
    257 	KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM);
    258 	DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n",
    259 	     *ino));
    260 
    261 	/* Update the in-memory inode freemap */
    262 	CLR_BITMAP_FREE(fs, *ino);
    263 
    264 	/*
    265 	 * Fetch the ifile entry and make sure the inode is really
    266 	 * free.
    267 	 */
    268 	LFS_IENTRY(ifp, fs, *ino, bp);
    269 	if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR)
    270 		panic("lfs_valloc: inuse inode %" PRId64 " on the free list",
    271 		    *ino);
    272 
    273 	/* Update the inode freelist head in the superblock. */
    274 	LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp));
    275 	DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n",
    276 	     *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp)));
    277 
    278 	/*
    279 	 * Retrieve the version number from the ifile entry. It was
    280 	 * bumped by vfree, so don't bump it again.
    281 	 */
    282 	*gen = lfs_if_getversion(fs, ifp);
    283 
    284 	/* Done with ifile entry */
    285 	brelse(bp, 0);
    286 
    287 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
    288 		/*
    289 		 * No more inodes; extend the ifile so that the next
    290 		 * lfs_valloc will succeed.
    291 		 */
    292 		if ((error = lfs_extend_ifile(fs, cred)) != 0) {
    293 			/* restore the freelist */
    294 			LFS_PUT_HEADFREE(fs, cip, cbp, *ino);
    295 
    296 			/* unlock and return */
    297 			lfs_segunlock(fs);
    298 			return error;
    299 		}
    300 	}
    301 #ifdef DIAGNOSTIC
    302 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM)
    303 		panic("inode 0 allocated [3]");
    304 #endif /* DIAGNOSTIC */
    305 
    306 	/* Set superblock modified bit */
    307 	mutex_enter(&lfs_lock);
    308 	fs->lfs_fmod = 1;
    309 	mutex_exit(&lfs_lock);
    310 
    311 	/* increment file count */
    312 	lfs_sb_addnfiles(fs, 1);
    313 
    314 	/* done */
    315 	lfs_segunlock(fs);
    316 	return 0;
    317 }
    318 
    319 /*
    320  * Allocate an inode for a new file, with given inode number and
    321  * version.
    322  *
    323  * Called in the same context as lfs_valloc and therefore shares the
    324  * same locking assumptions.
    325  *
    326  * XXX: WHICH MEANS IT OUGHT TO TAKE THE SEGLOCK WHILE FROBBING THIS
    327  * XXX: STUFF. REALLY.
    328  */
    329 int
    330 lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers)
    331 {
    332 	IFILE *ifp;
    333 	struct buf *bp, *cbp;
    334 	ino_t headino, thisino, oldnext;
    335 	CLEANERINFO *cip;
    336 
    337 	/* XXX: check for readonly */
    338 	/* XXX: assert no seglock */
    339 	/* XXX: should take seglock (as noted above) */
    340 
    341 	/*
    342 	 * If the ifile is too short to contain this inum, extend it.
    343 	 *
    344 	 * XXX: lfs_extend_ifile should take a size instead of always
    345 	 * doing just one block at time.
    346 	 */
    347 	while (VTOI(fs->lfs_ivnode)->i_size <= (ino /
    348 		lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
    349 		<< lfs_sb_getbshift(fs)) {
    350 		lfs_extend_ifile(fs, NOCRED);
    351 	}
    352 
    353 	/*
    354 	 * fetch the ifile entry; get the inode freelist next pointer,
    355 	 * and set the version as directed.
    356 	 */
    357 	LFS_IENTRY(ifp, fs, ino, bp);
    358 	oldnext = lfs_if_getnextfree(fs, ifp);
    359 	lfs_if_setversion(fs, ifp, vers);
    360 	brelse(bp, 0);
    361 
    362 	/* Get head of inode freelist */
    363 	LFS_GET_HEADFREE(fs, cip, cbp, &headino);
    364 	if (headino == ino) {
    365 		/* Easy case: the inode we wanted was at the head */
    366 		LFS_PUT_HEADFREE(fs, cip, cbp, oldnext);
    367 	} else {
    368 		ino_t nextfree;
    369 
    370 		/* Have to find the desired inode in the freelist... */
    371 
    372 		thisino = headino;
    373 		while (1) {
    374 			/* read this ifile entry */
    375 			LFS_IENTRY(ifp, fs, thisino, bp);
    376 			nextfree = lfs_if_getnextfree(fs, ifp);
    377 			/* stop if we find it or we hit the end */
    378 			if (nextfree == ino ||
    379 			    nextfree == LFS_UNUSED_INUM)
    380 				break;
    381 			/* nope, keep going... */
    382 			thisino = nextfree;
    383 			brelse(bp, 0);
    384 		}
    385 		if (nextfree == LFS_UNUSED_INUM) {
    386 			/* hit the end -- this inode is not available */
    387 			brelse(bp, 0);
    388 			/* XXX release seglock (see above) */
    389 			return ENOENT;
    390 		}
    391 		/* found it; update the next pointer */
    392 		lfs_if_setnextfree(fs, ifp, oldnext);
    393 		/* write the ifile block */
    394 		LFS_BWRITE_LOG(bp);
    395 	}
    396 
    397 	/* done */
    398 	/* XXX release seglock (see above) */
    399 	return 0;
    400 }
    401 
    402 #if 0
    403 /*
    404  * Find the highest-numbered allocated inode.
    405  * This will be used to shrink the Ifile.
    406  */
    407 static inline ino_t
    408 lfs_last_alloc_ino(struct lfs *fs)
    409 {
    410 	ino_t ino, maxino;
    411 
    412 	maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
    413 		  lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) *
    414 		lfs_sb_getifpb(fs);
    415 	for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) {
    416 		if (ISSET_BITMAP_FREE(fs, ino) == 0)
    417 			break;
    418 	}
    419 	return ino;
    420 }
    421 #endif
    422 
    423 /*
    424  * Find the previous (next lowest numbered) free inode, if any.
    425  * If there is none, return LFS_UNUSED_INUM.
    426  *
    427  * XXX: locking?
    428  */
    429 static inline ino_t
    430 lfs_freelist_prev(struct lfs *fs, ino_t ino)
    431 {
    432 	ino_t tino, bound, bb, freehdbb;
    433 
    434 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
    435 		/* No free inodes at all */
    436 		return LFS_UNUSED_INUM;
    437 	}
    438 
    439 	/* Search our own word first */
    440 	bound = ino & ~BMMASK;
    441 	for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--)
    442 		if (ISSET_BITMAP_FREE(fs, tino))
    443 			return tino;
    444 	/* If there are no lower words to search, just return */
    445 	if (ino >> BMSHIFT == 0)
    446 		return LFS_UNUSED_INUM;
    447 
    448 	/*
    449 	 * Find a word with a free inode in it.  We have to be a bit
    450 	 * careful here since ino_t is unsigned.
    451 	 */
    452 	freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT);
    453 	for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb)
    454 		if (fs->lfs_ino_bitmap[bb])
    455 			break;
    456 	if (fs->lfs_ino_bitmap[bb] == 0)
    457 		return LFS_UNUSED_INUM;
    458 
    459 	/* Search the word we found */
    460 	for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) &&
    461 	     tino > LFS_UNUSED_INUM; tino--)
    462 		if (ISSET_BITMAP_FREE(fs, tino))
    463 			break;
    464 
    465 	/* Avoid returning reserved inode numbers */
    466 	if (tino <= LFS_IFILE_INUM)
    467 		tino = LFS_UNUSED_INUM;
    468 
    469 	return tino;
    470 }
    471 
    472 /*
    473  * Free an inode.
    474  *
    475  * Takes lfs_seglock. Also (independently) takes vp->v_interlock.
    476  */
    477 /* ARGUSED */
    478 /* VOP_BWRITE 2i times */
    479 int
    480 lfs_vfree(struct vnode *vp, ino_t ino, int mode)
    481 {
    482 	SEGUSE *sup;
    483 	CLEANERINFO *cip;
    484 	struct buf *cbp, *bp;
    485 	IFILE *ifp;
    486 	struct inode *ip;
    487 	struct lfs *fs;
    488 	daddr_t old_iaddr;
    489 	ino_t otail;
    490 
    491 	/* Get the inode number and file system. */
    492 	ip = VTOI(vp);
    493 	fs = ip->i_lfs;
    494 	ino = ip->i_number;
    495 
    496 	/* XXX: assert not readonly */
    497 
    498 	ASSERT_NO_SEGLOCK(fs);
    499 	DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
    500 
    501 	/* Drain of pending writes */
    502 	mutex_enter(vp->v_interlock);
    503 	while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) {
    504 		cv_wait(&vp->v_cv, vp->v_interlock);
    505 	}
    506 	mutex_exit(vp->v_interlock);
    507 
    508 	lfs_seglock(fs, SEGM_PROT);
    509 
    510 	/*
    511 	 * If the inode was in a dirop, it isn't now.
    512 	 *
    513 	 * XXX: why are (v_uflag & VU_DIROP) and (ip->i_flag & IN_ADIROP)
    514 	 * not updated together in one function? (and why do both exist,
    515 	 * anyway?)
    516 	 */
    517 	lfs_unmark_vnode(vp);
    518 
    519 	mutex_enter(&lfs_lock);
    520 	if (vp->v_uflag & VU_DIROP) {
    521 		vp->v_uflag &= ~VU_DIROP;
    522 		--lfs_dirvcount;
    523 		--fs->lfs_dirvcount;
    524 		TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
    525 		wakeup(&fs->lfs_dirvcount);
    526 		wakeup(&lfs_dirvcount);
    527 		mutex_exit(&lfs_lock);
    528 		vrele(vp);
    529 
    530 		/*
    531 		 * If this inode is not going to be written any more, any
    532 		 * segment accounting left over from its truncation needs
    533 		 * to occur at the end of the next dirops flush.  Attach
    534 		 * them to the fs-wide list for that purpose.
    535 		 */
    536 		if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
    537 			struct segdelta *sd;
    538 
    539 			while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
    540 				LIST_REMOVE(sd, list);
    541 				LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
    542 			}
    543 		}
    544 	} else {
    545 		/*
    546 		 * If it's not a dirop, we can finalize right away.
    547 		 */
    548 		mutex_exit(&lfs_lock);
    549 		lfs_finalize_ino_seguse(fs, ip);
    550 	}
    551 
    552 	/* it is no longer an unwritten inode, so update the counts */
    553 	mutex_enter(&lfs_lock);
    554 	LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
    555 	mutex_exit(&lfs_lock);
    556 
    557 	/* Turn off all inode modification flags */
    558 	ip->i_flag &= ~IN_ALLMOD;
    559 
    560 	/* Mark it deleted */
    561 	ip->i_lfs_iflags |= LFSI_DELETED;
    562 
    563 	/* Mark it free in the in-memory inode freemap */
    564 	SET_BITMAP_FREE(fs, ino);
    565 
    566 	/*
    567 	 * Set the ifile's inode entry to unused, increment its version number
    568 	 * and link it onto the free chain.
    569 	 */
    570 
    571 	/* fetch the ifile entry */
    572 	LFS_IENTRY(ifp, fs, ino, bp);
    573 
    574 	/* update the on-disk address (to "nowhere") */
    575 	old_iaddr = lfs_if_getdaddr(fs, ifp);
    576 	lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
    577 
    578 	/* bump the version */
    579 	lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1);
    580 
    581 	if (lfs_sb_getversion(fs) == 1) {
    582 		ino_t nextfree;
    583 
    584 		/* insert on freelist */
    585 		LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
    586 		lfs_if_setnextfree(fs, ifp, nextfree);
    587 		LFS_PUT_HEADFREE(fs, cip, cbp, ino);
    588 
    589 		/* write the ifile block */
    590 		(void) LFS_BWRITE_LOG(bp); /* Ifile */
    591 	} else {
    592 		ino_t tino, onf;
    593 
    594 		/*
    595 		 * Clear the freelist next pointer and write the ifile
    596 		 * block. XXX: why? I'm sure there must be a reason but
    597 		 * it seems both silly and dangerous.
    598 		 */
    599 		lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
    600 		(void) LFS_BWRITE_LOG(bp); /* Ifile */
    601 
    602 		/*
    603 		 * Insert on freelist in order.
    604 		 */
    605 
    606 		/* Find the next lower (by number) free inode */
    607 		tino = lfs_freelist_prev(fs, ino);
    608 
    609 		if (tino == LFS_UNUSED_INUM) {
    610 			ino_t nextfree;
    611 
    612 			/*
    613 			 * There isn't one; put us on the freelist head.
    614 			 */
    615 
    616 			/* reload the ifile block */
    617 			LFS_IENTRY(ifp, fs, ino, bp);
    618 			/* update the list */
    619 			LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
    620 			lfs_if_setnextfree(fs, ifp, nextfree);
    621 			LFS_PUT_HEADFREE(fs, cip, cbp, ino);
    622 			DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n",
    623 			     (long long)nextfree, (long long)ino));
    624 			/* write the ifile block */
    625 			LFS_BWRITE_LOG(bp); /* Ifile */
    626 
    627 			/* If the list was empty, set tail too */
    628 			LFS_GET_TAILFREE(fs, cip, cbp, &otail);
    629 			if (otail == LFS_UNUSED_INUM) {
    630 				LFS_PUT_TAILFREE(fs, cip, cbp, ino);
    631 				DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
    632 				      "-> %lld\n", (long long)otail,
    633 				      (long long)ino));
    634 			}
    635 		} else {
    636 			/*
    637 			 * Insert this inode into the list after tino.
    638 			 * We hold the segment lock so we don't have to
    639 			 * worry about blocks being written out of order.
    640 			 */
    641 
    642 			DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld "
    643 			      " after %lld\n", ino, tino));
    644 
    645 			/* load the previous inode's ifile block */
    646 			LFS_IENTRY(ifp, fs, tino, bp);
    647 			/* update the list pointer */
    648 			onf = lfs_if_getnextfree(fs, ifp);
    649 			lfs_if_setnextfree(fs, ifp, ino);
    650 			/* write the block */
    651 			LFS_BWRITE_LOG(bp);	/* Ifile */
    652 
    653 			/* load this inode's ifile block */
    654 			LFS_IENTRY(ifp, fs, ino, bp);
    655 			/* update the list pointer */
    656 			lfs_if_setnextfree(fs, ifp, onf);
    657 			/* write the block */
    658 			LFS_BWRITE_LOG(bp);	/* Ifile */
    659 
    660 			/* If we're last, put us on the tail */
    661 			if (onf == LFS_UNUSED_INUM) {
    662 				LFS_GET_TAILFREE(fs, cip, cbp, &otail);
    663 				LFS_PUT_TAILFREE(fs, cip, cbp, ino);
    664 				DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
    665 				      "-> %lld\n", (long long)otail,
    666 				      (long long)ino));
    667 			}
    668 		}
    669 	}
    670 #ifdef DIAGNOSTIC
    671 	/* XXX: shouldn't this check be further up *before* we trash the fs? */
    672 	if (ino == LFS_UNUSED_INUM) {
    673 		panic("inode 0 freed");
    674 	}
    675 #endif /* DIAGNOSTIC */
    676 
    677 	/*
    678 	 * Update the segment summary for the segment where the on-disk
    679 	 * copy used to be.
    680 	 */
    681 	if (old_iaddr != LFS_UNUSED_DADDR) {
    682 		/* load it */
    683 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp);
    684 #ifdef DIAGNOSTIC
    685 		/* the number of bytes in the segment should not become < 0 */
    686 		if (sup->su_nbytes < DINOSIZE(fs)) {
    687 			printf("lfs_vfree: negative byte count"
    688 			       " (segment %" PRIu32 " short by %d)\n",
    689 			       lfs_dtosn(fs, old_iaddr),
    690 			       (int)DINOSIZE(fs) -
    691 				    sup->su_nbytes);
    692 			panic("lfs_vfree: negative byte count");
    693 			sup->su_nbytes = DINOSIZE(fs);
    694 		}
    695 #endif
    696 		/* update the number of bytes in the segment */
    697 		sup->su_nbytes -= DINOSIZE(fs);
    698 		/* write the segment entry */
    699 		LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */
    700 	}
    701 
    702 	/* Set superblock modified bit. */
    703 	mutex_enter(&lfs_lock);
    704 	fs->lfs_fmod = 1;
    705 	mutex_exit(&lfs_lock);
    706 
    707 	/* Decrement file count. */
    708 	lfs_sb_subnfiles(fs, 1);
    709 
    710 	lfs_segunlock(fs);
    711 
    712 	return (0);
    713 }
    714 
    715 /*
    716  * Sort the freelist and set up the free-inode bitmap.
    717  * To be called by lfs_mountfs().
    718  *
    719  * Takes the segmenet lock.
    720  */
    721 void
    722 lfs_order_freelist(struct lfs *fs)
    723 {
    724 	CLEANERINFO *cip;
    725 	IFILE *ifp = NULL;
    726 	struct buf *bp;
    727 	ino_t ino, firstino, lastino, maxino;
    728 #ifdef notyet
    729 	struct vnode *vp;
    730 #endif
    731 
    732 	ASSERT_NO_SEGLOCK(fs);
    733 	lfs_seglock(fs, SEGM_PROT);
    734 
    735 	/* largest inode on fs */
    736 	maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
    737 		  lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
    738 
    739 	/* allocate the in-memory inode freemap */
    740 	/* XXX: assert that fs->lfs_ino_bitmap is null here */
    741 	fs->lfs_ino_bitmap =
    742 		malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t),
    743 		       M_SEGMENT, M_WAITOK | M_ZERO);
    744 	KASSERT(fs->lfs_ino_bitmap != NULL);
    745 
    746 	/*
    747 	 * Scan the ifile.
    748 	 */
    749 
    750 	firstino = lastino = LFS_UNUSED_INUM;
    751 	for (ino = 0; ino < maxino; ino++) {
    752 		/* Load this inode's ifile entry. */
    753 		if (ino % lfs_sb_getifpb(fs) == 0)
    754 			LFS_IENTRY(ifp, fs, ino, bp);
    755 		else
    756 			LFS_IENTRY_NEXT(ifp, fs);
    757 
    758 		/* Don't put zero or ifile on the free list */
    759 		if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM)
    760 			continue;
    761 
    762 #ifdef notyet
    763 		/*
    764 		 * Address orphaned files.
    765 		 *
    766 		 * The idea of this is to free inodes belonging to
    767 		 * files that were unlinked but not reclaimed, I guess
    768 		 * because if we're going to scan the whole ifile
    769 		 * anyway it costs very little to do this. I don't
    770 		 * immediately see any reason this should be disabled,
    771 		 * but presumably it doesn't work... not sure what
    772 		 * happens to such files currently. -- dholland 20160806
    773 		 */
    774 		if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE &&
    775 		    VFS_VGET(fs->lfs_ivnode->v_mount, ino, &vp) == 0) {
    776 			unsigned segno;
    777 
    778 			/* get the segment the inode in on disk  */
    779 			segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp));
    780 
    781 			/* truncate the inode */
    782 			lfs_truncate(vp, 0, 0, NOCRED);
    783 			vput(vp);
    784 
    785 			/* load the segment summary */
    786 			LFS_SEGENTRY(sup, fs, segno, bp);
    787 			/* update the number of bytes in the segment */
    788 			KASSERT(sup->su_nbytes >= DINOSIZE(fs));
    789 			sup->su_nbytes -= DINOSIZE(fs);
    790 			/* write the segment summary */
    791 			LFS_WRITESEGENTRY(sup, fs, segno, bp);
    792 
    793 			/* Drop the on-disk address */
    794 			lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
    795 			/* write the ifile entry */
    796 			LFS_BWRITE_LOG(bp);
    797 
    798 			/*
    799 			 * and reload it (XXX: why? I guess
    800 			 * LFS_BWRITE_LOG drops it...)
    801 			 */
    802 			LFS_IENTRY(ifp, fs, ino, bp);
    803 
    804 			/* Fall through to next if block */
    805 		}
    806 #endif
    807 
    808 		if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) {
    809 
    810 			/*
    811 			 * This inode is free. Put it on the free list.
    812 			 */
    813 
    814 			if (firstino == LFS_UNUSED_INUM) {
    815 				/* XXX: assert lastino == LFS_UNUSED_INUM? */
    816 				/* remember the first free inode */
    817 				firstino = ino;
    818 			} else {
    819 				/* release this inode's ifile entry */
    820 				brelse(bp, 0);
    821 
    822 				/* XXX: assert lastino != LFS_UNUSED_INUM? */
    823 
    824 				/* load lastino's ifile entry */
    825 				LFS_IENTRY(ifp, fs, lastino, bp);
    826 				/* set the list pointer */
    827 				lfs_if_setnextfree(fs, ifp, ino);
    828 				/* write the block */
    829 				LFS_BWRITE_LOG(bp);
    830 
    831 				/* reload this inode's ifile entry */
    832 				LFS_IENTRY(ifp, fs, ino, bp);
    833 			}
    834 			/* remember the last free inode seen so far */
    835 			lastino = ino;
    836 
    837 			/* Mark this inode free in the in-memory freemap */
    838 			SET_BITMAP_FREE(fs, ino);
    839 		}
    840 
    841 		/* If moving to the next ifile block, release the buffer. */
    842 		if ((ino + 1) % lfs_sb_getifpb(fs) == 0)
    843 			brelse(bp, 0);
    844 	}
    845 
    846 	/* Write the freelist head and tail pointers */
    847 	/* XXX: do we need to mark the superblock dirty? */
    848 	LFS_PUT_HEADFREE(fs, cip, bp, firstino);
    849 	LFS_PUT_TAILFREE(fs, cip, bp, lastino);
    850 
    851 	/* done */
    852 	lfs_segunlock(fs);
    853 }
    854 
    855 /*
    856  * Mark a file orphaned (unlinked but not yet reclaimed) by inode
    857  * number. Do this with a magic freelist next pointer.
    858  *
    859  * XXX: howzabout some locking?
    860  */
    861 void
    862 lfs_orphan(struct lfs *fs, ino_t ino)
    863 {
    864 	IFILE *ifp;
    865 	struct buf *bp;
    866 
    867 	LFS_IENTRY(ifp, fs, ino, bp);
    868 	lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE);
    869 	LFS_BWRITE_LOG(bp);
    870 }
    871