Home | History | Annotate | Line # | Download | only in ext2fs
ext2fs_alloc.c revision 1.54
      1 /*	$NetBSD: ext2fs_alloc.c,v 1.54 2023/08/26 05:22:50 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1982, 1986, 1989, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  *
     31  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
     32  *  Modified for ext2fs by Manuel Bouyer.
     33  */
     34 
     35 /*
     36  * Copyright (c) 1997 Manuel Bouyer.
     37  *
     38  * Redistribution and use in source and binary forms, with or without
     39  * modification, are permitted provided that the following conditions
     40  * are met:
     41  * 1. Redistributions of source code must retain the above copyright
     42  *    notice, this list of conditions and the following disclaimer.
     43  * 2. Redistributions in binary form must reproduce the above copyright
     44  *    notice, this list of conditions and the following disclaimer in the
     45  *    documentation and/or other materials provided with the distribution.
     46  *
     47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     48  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     49  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     50  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     51  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     52  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     53  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     54  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     55  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     56  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     57  *
     58  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
     59  *  Modified for ext2fs by Manuel Bouyer.
     60  */
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.54 2023/08/26 05:22:50 riastradh Exp $");
     64 
     65 #include <sys/param.h>
     66 #include <sys/systm.h>
     67 #include <sys/buf.h>
     68 #include <sys/proc.h>
     69 #include <sys/vnode.h>
     70 #include <sys/mount.h>
     71 #include <sys/kernel.h>
     72 #include <sys/syslog.h>
     73 #include <sys/kauth.h>
     74 
     75 #include <lib/libkern/crc16.h>
     76 
     77 #include <ufs/ufs/inode.h>
     78 #include <ufs/ufs/ufs_extern.h>
     79 #include <ufs/ufs/ufsmount.h>
     80 
     81 #include <ufs/ext2fs/ext2fs.h>
     82 #include <ufs/ext2fs/ext2fs_extern.h>
     83 
     84 u_long ext2gennumber;
     85 
     86 static daddr_t	ext2fs_alloccg(struct inode *, int, daddr_t, int);
     87 static u_long	ext2fs_dirpref(struct m_ext2fs *);
     88 static void	ext2fs_fserr(struct m_ext2fs *, u_int, const char *);
     89 static u_long	ext2fs_hashalloc(struct inode *, int, long, int,
     90 		    daddr_t (*)(struct inode *, int, daddr_t, int));
     91 static daddr_t	ext2fs_nodealloccg(struct inode *, int, daddr_t, int);
     92 static daddr_t	ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t);
     93 static __inline void	ext2fs_cg_update(struct m_ext2fs *, int,
     94     struct ext2_gd *, int, int, int, daddr_t);
     95 static uint16_t ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *);
     96 static void	ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *,
     97     char *);
     98 
     99 /*
    100  * Allocate a block in the file system.
    101  *
    102  * A preference may be optionally specified. If a preference is given
    103  * the following hierarchy is used to allocate a block:
    104  *   1) allocate the requested block.
    105  *   2) allocate a rotationally optimal block in the same cylinder.
    106  *   3) allocate a block in the same cylinder group.
    107  *   4) quadradically rehash into other cylinder groups, until an
    108  *	  available block is located.
    109  * If no block preference is given the following hierarchy is used
    110  * to allocate a block:
    111  *   1) allocate a block in the cylinder group that contains the
    112  *	  inode for the file.
    113  *   2) quadradically rehash into other cylinder groups, until an
    114  *	  available block is located.
    115  */
    116 int
    117 ext2fs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref,
    118     kauth_cred_t cred, daddr_t *bnp)
    119 {
    120 	struct m_ext2fs *fs;
    121 	daddr_t bno;
    122 	int cg;
    123 
    124 	*bnp = 0;
    125 	fs = ip->i_e2fs;
    126 #ifdef DIAGNOSTIC
    127 	if (cred == NOCRED)
    128 		panic("ext2fs_alloc: missing credential");
    129 #endif /* DIAGNOSTIC */
    130 	if (fs->e2fs.e2fs_fbcount == 0)
    131 		goto nospace;
    132 	if (kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
    133 	    NULL, NULL) != 0 &&
    134 	    freespace(fs) <= 0)
    135 		goto nospace;
    136 	if (bpref >= fs->e2fs.e2fs_bcount)
    137 		bpref = 0;
    138 	if (bpref == 0)
    139 		cg = ino_to_cg(fs, ip->i_number);
    140 	else
    141 		cg = dtog(fs, bpref);
    142 	bno = (daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
    143 	    ext2fs_alloccg);
    144 	if (bno > 0) {
    145 		ext2fs_setnblock(ip, ext2fs_nblock(ip) + btodb(fs->e2fs_bsize));
    146 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    147 		*bnp = bno;
    148 		return 0;
    149 	}
    150 nospace:
    151 	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
    152 	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
    153 	return ENOSPC;
    154 }
    155 
    156 /*
    157  * Allocate an inode in the file system.
    158  *
    159  * If allocating a directory, use ext2fs_dirpref to select the inode.
    160  * If allocating in a directory, the following hierarchy is followed:
    161  *   1) allocate the preferred inode.
    162  *   2) allocate an inode in the same cylinder group.
    163  *   3) quadradically rehash into other cylinder groups, until an
    164  *	  available inode is located.
    165  * If no inode preference is given the following hierarchy is used
    166  * to allocate an inode:
    167  *   1) allocate an inode in cylinder group 0.
    168  *   2) quadradically rehash into other cylinder groups, until an
    169  *	  available inode is located.
    170  */
    171 int
    172 ext2fs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, ino_t *inop)
    173 {
    174 	struct inode *pip;
    175 	struct m_ext2fs *fs;
    176 	ino_t ino, ipref;
    177 	int cg;
    178 
    179 	pip = VTOI(pvp);
    180 	fs = pip->i_e2fs;
    181 	if (fs->e2fs.e2fs_ficount == 0)
    182 		goto noinodes;
    183 
    184 	if ((mode & IFMT) == IFDIR)
    185 		cg = ext2fs_dirpref(fs);
    186 	else
    187 		cg = ino_to_cg(fs, pip->i_number);
    188 	ipref = cg * fs->e2fs.e2fs_ipg + 1;
    189 	ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
    190 	if (ino == 0)
    191 		goto noinodes;
    192 
    193 	*inop = ino;
    194 	return 0;
    195 
    196 noinodes:
    197 	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
    198 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
    199 	return ENOSPC;
    200 }
    201 
    202 /*
    203  * Find a cylinder to place a directory.
    204  *
    205  * The policy implemented by this algorithm is to select from
    206  * among those cylinder groups with above the average number of
    207  * free inodes, the one with the smallest number of directories.
    208  */
    209 static u_long
    210 ext2fs_dirpref(struct m_ext2fs *fs)
    211 {
    212 	int cg, maxspace, mincg, avgifree;
    213 
    214 	avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
    215 	maxspace = 0;
    216 	mincg = -1;
    217 	for (cg = 0; cg < fs->e2fs_ncg; cg++) {
    218 		uint32_t nifree =
    219 		    (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree_hi) << 16)
    220 		    | fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree);
    221 		if (nifree < avgifree) {
    222 			continue;
    223 		}
    224 		uint32_t nbfree =
    225 		    (fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree_hi) << 16)
    226 		    | fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree);
    227 		if (mincg == -1 || nbfree > maxspace) {
    228 			mincg = cg;
    229 			maxspace = nbfree;
    230 		}
    231 	}
    232 	return mincg;
    233 }
    234 
    235 /*
    236  * Select the desired position for the next block in a file.  The file is
    237  * logically divided into sections. The first section is composed of the
    238  * direct blocks. Each additional section contains fs_maxbpg blocks.
    239  *
    240  * If no blocks have been allocated in the first section, the policy is to
    241  * request a block in the same cylinder group as the inode that describes
    242  * the file. Otherwise, the policy is to try to allocate the blocks
    243  * contigously. The two fields of the ext2 inode extension (see
    244  * ufs/ufs/inode.h) help this.
    245  */
    246 daddr_t
    247 ext2fs_blkpref(struct inode *ip, daddr_t lbn, int indx,
    248 		int32_t *bap /* XXX ondisk32 */)
    249 {
    250 	struct m_ext2fs *fs;
    251 	int cg, i;
    252 
    253 	fs = ip->i_e2fs;
    254 	/*
    255 	 * if we are doing contigous lbn allocation, try to alloc blocks
    256 	 * contigously on disk
    257 	 */
    258 
    259 	if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
    260 		return ip->i_e2fs_last_blk + 1;
    261 	}
    262 
    263 	/*
    264 	 * bap, if provided, gives us a list of blocks to which we want to
    265 	 * stay close
    266 	 */
    267 
    268 	if (bap) {
    269 		for (i = indx; i >= 0 ; i--) {
    270 			if (bap[i]) {
    271 				return fs2h32(bap[i]) + 1;
    272 			}
    273 		}
    274 	}
    275 
    276 	/* fall back to the first block of the cylinder containing the inode */
    277 
    278 	cg = ino_to_cg(fs, ip->i_number);
    279 	return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
    280 }
    281 
    282 /*
    283  * Implement the cylinder overflow algorithm.
    284  *
    285  * The policy implemented by this algorithm is:
    286  *   1) allocate the block in its requested cylinder group.
    287  *   2) quadradically rehash on the cylinder group number.
    288  *   3) brute force search for a free block.
    289  */
    290 static u_long
    291 ext2fs_hashalloc(struct inode *ip, int cg, long pref, int size,
    292 		daddr_t (*allocator)(struct inode *, int, daddr_t, int))
    293 {
    294 	struct m_ext2fs *fs;
    295 	long result;
    296 	int i, icg = cg;
    297 
    298 	fs = ip->i_e2fs;
    299 	/*
    300 	 * 1: preferred cylinder group
    301 	 */
    302 	result = (*allocator)(ip, cg, pref, size);
    303 	if (result)
    304 		return result;
    305 	/*
    306 	 * 2: quadratic rehash
    307 	 */
    308 	for (i = 1; i < fs->e2fs_ncg; i *= 2) {
    309 		cg += i;
    310 		if (cg >= fs->e2fs_ncg)
    311 			cg -= fs->e2fs_ncg;
    312 		result = (*allocator)(ip, cg, 0, size);
    313 		if (result)
    314 			return result;
    315 	}
    316 	/*
    317 	 * 3: brute force search
    318 	 * Note that we start at i == 2, since 0 was checked initially,
    319 	 * and 1 is always checked in the quadratic rehash.
    320 	 */
    321 	cg = (icg + 2) % fs->e2fs_ncg;
    322 	for (i = 2; i < fs->e2fs_ncg; i++) {
    323 		result = (*allocator)(ip, cg, 0, size);
    324 		if (result)
    325 			return result;
    326 		cg++;
    327 		if (cg == fs->e2fs_ncg)
    328 			cg = 0;
    329 	}
    330 	return 0;
    331 }
    332 
    333 /*
    334  * Determine whether a block can be allocated.
    335  *
    336  * Check to see if a block of the appropriate size is available,
    337  * and if it is, allocate it.
    338  */
    339 
    340 static daddr_t
    341 ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
    342 {
    343 	struct m_ext2fs *fs;
    344 	char *bbp;
    345 	struct buf *bp;
    346 	int error, bno, start, end, loc;
    347 
    348 	fs = ip->i_e2fs;
    349 	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0 &&
    350 	    fs->e2fs_gd[cg].ext2bgd_nbfree_hi == 0)
    351 		return 0;
    352 	error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
    353 		fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
    354 		fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
    355 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
    356 	if (error) {
    357 		return 0;
    358 	}
    359 	bbp = (char *)bp->b_data;
    360 
    361 	if (dtog(fs, bpref) != cg)
    362 		bpref = 0;
    363 
    364 	/* initialize block bitmap now if uninit */
    365 	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
    366 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)))) {
    367 		ext2fs_init_bb(fs, cg, &fs->e2fs_gd[cg], bbp);
    368 		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_BLOCK_UNINIT);
    369 	}
    370 
    371 	if (bpref != 0) {
    372 		bpref = dtogd(fs, bpref);
    373 		/*
    374 		 * if the requested block is available, use it
    375 		 */
    376 		if (isclr(bbp, bpref)) {
    377 			bno = bpref;
    378 			goto gotit;
    379 		}
    380 	}
    381 	/*
    382 	 * no blocks in the requested cylinder, so take next
    383 	 * available one in this cylinder group.
    384 	 * first try to get 8 contigous blocks, then fall back to a single
    385 	 * block.
    386 	 */
    387 	if (bpref)
    388 		start = dtogd(fs, bpref) / NBBY;
    389 	else
    390 		start = 0;
    391 	end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
    392 	for (loc = start; loc < end; loc++) {
    393 		if (bbp[loc] == 0) {
    394 			bno = loc * NBBY;
    395 			goto gotit;
    396 		}
    397 	}
    398 	for (loc = 0; loc < start; loc++) {
    399 		if (bbp[loc] == 0) {
    400 			bno = loc * NBBY;
    401 			goto gotit;
    402 		}
    403 	}
    404 
    405 	bno = ext2fs_mapsearch(fs, bbp, bpref);
    406 #if 0
    407 	/*
    408 	 * XXX jdolecek mapsearch actually never fails, it panics instead.
    409 	 * If re-enabling, make sure to brele() before returning.
    410 	 */
    411 	if (bno < 0)
    412 		return 0;
    413 #endif
    414 gotit:
    415 #ifdef DIAGNOSTIC
    416 	if (isset(bbp, (daddr_t)bno)) {
    417 		printf("%s: cg=%d bno=%d fs=%s\n", __func__,
    418 		    cg, bno, fs->e2fs_fsmnt);
    419 		panic("ext2fs_alloccg: dup alloc");
    420 	}
    421 #endif
    422 	setbit(bbp, (daddr_t)bno);
    423 	fs->e2fs.e2fs_fbcount--;
    424 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], -1, 0, 0, 0);
    425 	fs->e2fs_fmod = 1;
    426 	bdwrite(bp);
    427 	return cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno;
    428 }
    429 
    430 /*
    431  * Determine whether an inode can be allocated.
    432  *
    433  * Check to see if an inode is available, and if it is,
    434  * allocate it using the following policy:
    435  *   1) allocate the requested inode.
    436  *   2) allocate the next available inode after the requested
    437  *	  inode in the specified cylinder group.
    438  */
    439 static daddr_t
    440 ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
    441 {
    442 	struct m_ext2fs *fs;
    443 	char *ibp;
    444 	struct buf *bp;
    445 	int error, start, len, loc, map, i;
    446 
    447 	ipref--; /* to avoid a lot of (ipref -1) */
    448 	if (ipref == -1)
    449 		ipref = 0;
    450 	fs = ip->i_e2fs;
    451 	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0 ||
    452 	    fs->e2fs_gd[cg].ext2bgd_nifree_hi == 0)
    453 		return 0;
    454 	error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
    455 		fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
    456 		fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
    457 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
    458 	if (error) {
    459 		return 0;
    460 	}
    461 	ibp = (char *)bp->b_data;
    462 
    463 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0);
    464 
    465 	/* initialize inode bitmap now if uninit */
    466 	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
    467 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)))) {
    468 		KASSERT(fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) == fs->e2fs.e2fs_ipg);
    469 		memset(ibp, 0, fs->e2fs_bsize);
    470 		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_INODE_UNINIT);
    471 	}
    472 
    473 	if (ipref) {
    474 		ipref %= fs->e2fs.e2fs_ipg;
    475 		if (isclr(ibp, ipref))
    476 			goto gotit;
    477 	}
    478 	start = ipref / NBBY;
    479 	len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
    480 	loc = skpc(0xff, len, &ibp[start]);
    481 	if (loc == 0) {
    482 		len = start + 1;
    483 		start = 0;
    484 		loc = skpc(0xff, len, &ibp[0]);
    485 		if (loc == 0) {
    486 			printf("%s: cg = %d, ipref = %lld, fs = %s\n", __func__,
    487 			    cg, (long long)ipref, fs->e2fs_fsmnt);
    488 			panic("%s: map corrupted", __func__);
    489 			/* NOTREACHED */
    490 		}
    491 	}
    492 	i = start + len - loc;
    493 	map = ibp[i] ^ 0xff;
    494 	if (map == 0) {
    495 		printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
    496 		panic("%s: inode not in map", __func__);
    497 	}
    498 	ipref = i * NBBY + ffs(map) - 1;
    499 gotit:
    500 	setbit(ibp, ipref);
    501 	fs->e2fs.e2fs_ficount--;
    502 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
    503 		0, -1, ((mode & IFMT) == IFDIR) ? 1 : 0, ipref);
    504 	fs->e2fs_fmod = 1;
    505 	bdwrite(bp);
    506 	return cg * fs->e2fs.e2fs_ipg + ipref + 1;
    507 }
    508 
    509 /*
    510  * Free a block.
    511  *
    512  * The specified block is placed back in the
    513  * free map.
    514  */
    515 void
    516 ext2fs_blkfree(struct inode *ip, daddr_t bno)
    517 {
    518 	struct m_ext2fs *fs;
    519 	char *bbp;
    520 	struct buf *bp;
    521 	int error, cg;
    522 
    523 	fs = ip->i_e2fs;
    524 	cg = dtog(fs, bno);
    525 
    526 	KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
    527 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)) == 0);
    528 
    529 	if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
    530 		printf("%s: bad block %jd, ino %ju\n",
    531 		    __func__, (intmax_t)bno, (uintmax_t)ip->i_number);
    532 		ext2fs_fserr(fs, ip->i_uid, "bad block");
    533 		return;
    534 	}
    535 	error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
    536 	    fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
    537 	    fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
    538 	    (int)fs->e2fs_bsize, B_MODIFY, &bp);
    539 	if (error) {
    540 		return;
    541 	}
    542 	bbp = (char *)bp->b_data;
    543 	bno = dtogd(fs, bno);
    544 	if (isclr(bbp, bno)) {
    545 		printf("%s: dev = %#jx, block = %jd, fs = %s\n", __func__,
    546 		    (uintmax_t)ip->i_dev, (intmax_t)bno,
    547 		    fs->e2fs_fsmnt);
    548 		panic("%s: freeing free block", __func__);
    549 	}
    550 	clrbit(bbp, bno);
    551 	fs->e2fs.e2fs_fbcount++;
    552 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 1, 0, 0, 0);
    553 	fs->e2fs_fmod = 1;
    554 	bdwrite(bp);
    555 }
    556 
    557 /*
    558  * Free an inode.
    559  *
    560  * The specified inode is placed back in the free map.
    561  */
    562 int
    563 ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode)
    564 {
    565 	struct m_ext2fs *fs;
    566 	char *ibp;
    567 	struct inode *pip;
    568 	struct buf *bp;
    569 	int error, cg;
    570 
    571 	pip = VTOI(pvp);
    572 	fs = pip->i_e2fs;
    573 
    574 	if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
    575 		panic("%s: range: dev = %#jx, ino = %ju, fs = %s",
    576 		    __func__, (uintmax_t)pip->i_dev, (uintmax_t)ino,
    577 		    fs->e2fs_fsmnt);
    578 
    579 	cg = ino_to_cg(fs, ino);
    580 
    581 	KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
    582 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0);
    583 
    584 	error = bread(pip->i_devvp, EXT2_FSBTODB64(fs,
    585 	    fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
    586 	    fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
    587 	    (int)fs->e2fs_bsize, B_MODIFY, &bp);
    588 	if (error) {
    589 		return 0;
    590 	}
    591 	ibp = (char *)bp->b_data;
    592 	ino = (ino - 1) % fs->e2fs.e2fs_ipg;
    593 	if (isclr(ibp, ino)) {
    594 		printf("%s: dev = %#jx, ino = %ju, fs = %s\n", __func__,
    595 		    (uintmax_t)pip->i_dev,
    596 		    (uintmax_t)ino, fs->e2fs_fsmnt);
    597 		if (fs->e2fs_ronly == 0)
    598 			panic("%s: freeing free inode", __func__);
    599 	}
    600 	clrbit(ibp, ino);
    601 	fs->e2fs.e2fs_ficount++;
    602 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
    603 		0, 1, ((mode & IFMT) == IFDIR) ? -1 : 0, 0);
    604 	fs->e2fs_fmod = 1;
    605 	bdwrite(bp);
    606 	return 0;
    607 }
    608 
    609 /*
    610  * Find a block in the specified cylinder group.
    611  *
    612  * It is a panic if a request is made to find a block if none are
    613  * available.
    614  */
    615 
    616 static daddr_t
    617 ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
    618 {
    619 	int start, len, loc, i, map;
    620 
    621 	/*
    622 	 * find the fragment by searching through the free block
    623 	 * map for an appropriate bit pattern
    624 	 */
    625 	if (bpref)
    626 		start = dtogd(fs, bpref) / NBBY;
    627 	else
    628 		start = 0;
    629 	len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
    630 	loc = skpc(0xff, len, &bbp[start]);
    631 	if (loc == 0) {
    632 		len = start + 1;
    633 		start = 0;
    634 		loc = skpc(0xff, len, &bbp[start]);
    635 		if (loc == 0) {
    636 			printf("%s: start = %d, len = %d, fs = %s\n",
    637 			    __func__, start, len, fs->e2fs_fsmnt);
    638 			panic("%s: map corrupted", __func__);
    639 			/* NOTREACHED */
    640 		}
    641 	}
    642 	i = start + len - loc;
    643 	map = bbp[i] ^ 0xff;
    644 	if (map == 0) {
    645 		printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
    646 		panic("%s: block not in map", __func__);
    647 	}
    648 	return i * NBBY + ffs(map) - 1;
    649 }
    650 
    651 /*
    652  * Fserr prints the name of a file system with an error diagnostic.
    653  *
    654  * The form of the error message is:
    655  *	fs: error message
    656  */
    657 static void
    658 ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp)
    659 {
    660 
    661 	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
    662 }
    663 
    664 static __inline void
    665 ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff)
    666 {
    667 	if (nifree) {
    668 		uint32_t ext2bgd_nifree = fs2h16(gd->ext2bgd_nifree) |
    669 		    (fs2h16(gd->ext2bgd_nifree_hi) << 16);
    670 		ext2bgd_nifree += nifree;
    671 		gd->ext2bgd_nifree = h2fs16(ext2bgd_nifree);
    672 		gd->ext2bgd_nifree_hi = h2fs16(ext2bgd_nifree >> 16);
    673 		/*
    674 		 * If we allocated inode on bigger offset than what was
    675 		 * ever used before, bump the itable_unused count. This
    676 		 * member only ever grows, and is used only for initialization
    677 		 * !INODE_ZEROED groups with used inodes. Of course, by the
    678 		 * time we get here the itables are already zeroed, but
    679 		 * e2fstools fsck.ext4 still checks this.
    680 		 */
    681 		if (E2FS_HAS_GD_CSUM(fs) && nifree < 0 &&
    682 		    (ioff + 1) >= (fs->e2fs.e2fs_ipg -
    683 		    fs2h16(gd->ext2bgd_itable_unused_lo))) {
    684 			gd->ext2bgd_itable_unused_lo =
    685 			    h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1));
    686 		}
    687 
    688 		KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
    689 		    gd->ext2bgd_itable_unused_lo <= ext2bgd_nifree);
    690 	}
    691 
    692 
    693 	if (nbfree) {
    694 		uint32_t ext2bgd_nbfree = fs2h16(gd->ext2bgd_nbfree)
    695 		    | (fs2h16(gd->ext2bgd_nbfree_hi) << 16);
    696 		ext2bgd_nbfree += nbfree;
    697 		gd->ext2bgd_nbfree = h2fs16(ext2bgd_nbfree);
    698 		gd->ext2bgd_nbfree_hi = h2fs16(ext2bgd_nbfree >> 16);
    699 	}
    700 
    701 	if (ndirs) {
    702 		uint32_t ext2bgd_ndirs = fs2h16(gd->ext2bgd_ndirs)
    703 		    | (fs2h16(gd->ext2bgd_ndirs_hi) << 16);
    704 		ext2bgd_ndirs += ndirs;
    705 		gd->ext2bgd_ndirs = h2fs16(ext2bgd_ndirs);
    706 		gd->ext2bgd_ndirs_hi = h2fs16(ext2bgd_ndirs >> 16);
    707 	}
    708 
    709 	if (E2FS_HAS_GD_CSUM(fs))
    710 		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
    711 }
    712 
    713 /*
    714  * Compute group description csum. Structure data must be LE (not host).
    715  * Returned as LE (disk encoding).
    716  */
    717 static uint16_t
    718 ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd)
    719 {
    720 	uint16_t crc;
    721 	size_t cgsize = 1 << fs->e2fs_group_desc_shift;
    722 	uint32_t cg_bswapped = h2fs32((uint32_t)cg);
    723 	size_t off;
    724 
    725 	if (!EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM))
    726 		return 0;
    727 
    728 	off = offsetof(struct ext2_gd, ext2bgd_checksum);
    729 
    730 	crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid, sizeof(fs->e2fs.e2fs_uuid));
    731 	crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped));
    732 	crc = crc16(crc, (uint8_t *)gd, off);
    733 	crc = crc16(crc, (uint8_t *)gd + off + 2, cgsize - (off + 2));
    734 
    735 	return h2fs16(crc);
    736 }
    737 
    738 static void
    739 ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp)
    740 {
    741 	int i;
    742 
    743 	memset(bbp, 0, fs->e2fs_bsize);
    744 
    745 	/*
    746 	 * No block was ever allocated on this cg before, so the only used
    747 	 * blocks are metadata blocks on start of the group. We could optimize
    748 	 * this to set by bytes, but since this is done once per the group
    749 	 * in lifetime of filesystem, it really is not worth it.
    750 	 */
    751 	for(i=0; i < fs->e2fs.e2fs_bpg - fs2h16(gd->ext2bgd_nbfree); i++)
    752 		setbit(bbp, i);
    753 }
    754 
    755 /*
    756  * Verify csum and initialize itable if not done already
    757  */
    758 int
    759 ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly)
    760 {
    761 	struct ext2_gd *gd;
    762 	ino_t ioff;
    763 	size_t boff;
    764 	struct buf *bp;
    765 	int cg, i, error;
    766 
    767 	if (!E2FS_HAS_GD_CSUM(fs))
    768 		return 0;
    769 
    770 	for(cg = 0; cg < fs->e2fs_ncg; cg++) {
    771 		gd = &fs->e2fs_gd[cg];
    772 
    773 		/* Verify checksum */
    774 		uint16_t csum = ext2fs_cg_get_csum(fs, cg, gd);
    775 		if (gd->ext2bgd_checksum != csum) {
    776 			printf("%s: group %d invalid csum (%#x != %#x)\n",
    777 			    __func__, cg, gd->ext2bgd_checksum, csum);
    778 			return EINVAL;
    779 		}
    780 
    781 		/* if mounting read-write, zero itable if not already done */
    782 		if (ronly ||
    783 		    (gd->ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0)
    784 			continue;
    785 
    786 		/*
    787 		 * We are skipping already used inodes, zero rest of itable
    788 		 * blocks. First block to zero could be only partial wipe, all
    789 		 * others are wiped completely. This might take a while,
    790 		 * there could be many inode table blocks. We use
    791 		 * delayed writes, so this shouldn't block for very
    792 		 * long.
    793 		 */
    794 		ioff = fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo);
    795 		boff = (ioff % fs->e2fs_ipb) * EXT2_DINODE_SIZE(fs);
    796 
    797 		for(i = ioff / fs->e2fs_ipb; i < fs->e2fs_itpg; i++) {
    798 			if (boff) {
    799 				/* partial wipe, must read old data */
    800 				error = bread(devvp, EXT2_FSBTODB64OFF(fs,
    801 				    fs2h32(gd->ext2bgd_i_tables),
    802 				    fs2h32(gd->ext2bgd_i_tables_hi), i),
    803 				    (int)fs->e2fs_bsize, B_MODIFY, &bp);
    804 				if (error) {
    805 					printf("%s: can't read itable block",
    806 					    __func__);
    807 					return error;
    808 				}
    809 				memset((char *)bp->b_data + boff, 0,
    810 				    fs->e2fs_bsize - boff);
    811 				boff = 0;
    812 			} else {
    813 				/*
    814 				 * Complete wipe, don't need to read data. This
    815 				 * assumes nothing else is changing the data.
    816 				 */
    817 				bp = getblk(devvp, EXT2_FSBTODB64OFF(fs,
    818 				    fs2h32(gd->ext2bgd_i_tables),
    819 				    fs2h32(gd->ext2bgd_i_tables_hi), i),
    820 				    (int)fs->e2fs_bsize, 0, 0);
    821 				clrbuf(bp);
    822 			}
    823 
    824 			bdwrite(bp);
    825 		}
    826 
    827 		gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED);
    828 		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
    829 		fs->e2fs_fmod = 1;
    830 	}
    831 
    832 	return 0;
    833 }
    834