Home | History | Annotate | Line # | Download | only in ext2fs
ext2fs_alloc.c revision 1.42.14.3
      1 /*	$NetBSD: ext2fs_alloc.c,v 1.42.14.3 2017/12/03 11:39:21 jdolecek Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1982, 1986, 1989, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  *
     31  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
     32  *  Modified for ext2fs by Manuel Bouyer.
     33  */
     34 
     35 /*
     36  * Copyright (c) 1997 Manuel Bouyer.
     37  *
     38  * Redistribution and use in source and binary forms, with or without
     39  * modification, are permitted provided that the following conditions
     40  * are met:
     41  * 1. Redistributions of source code must retain the above copyright
     42  *    notice, this list of conditions and the following disclaimer.
     43  * 2. Redistributions in binary form must reproduce the above copyright
     44  *    notice, this list of conditions and the following disclaimer in the
     45  *    documentation and/or other materials provided with the distribution.
     46  *
     47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     48  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     49  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     50  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     51  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     52  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     53  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     54  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     55  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     56  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     57  *
     58  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
     59  *  Modified for ext2fs by Manuel Bouyer.
     60  */
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.42.14.3 2017/12/03 11:39:21 jdolecek Exp $");
     64 
     65 #include <sys/param.h>
     66 #include <sys/systm.h>
     67 #include <sys/buf.h>
     68 #include <sys/proc.h>
     69 #include <sys/vnode.h>
     70 #include <sys/mount.h>
     71 #include <sys/kernel.h>
     72 #include <sys/syslog.h>
     73 #include <sys/kauth.h>
     74 
     75 #include <lib/libkern/crc16.h>
     76 
     77 #include <ufs/ufs/inode.h>
     78 #include <ufs/ufs/ufs_extern.h>
     79 #include <ufs/ufs/ufsmount.h>
     80 
     81 #include <ufs/ext2fs/ext2fs.h>
     82 #include <ufs/ext2fs/ext2fs_extern.h>
     83 
     84 u_long ext2gennumber;
     85 
     86 static daddr_t	ext2fs_alloccg(struct inode *, int, daddr_t, int);
     87 static u_long	ext2fs_dirpref(struct m_ext2fs *);
     88 static void	ext2fs_fserr(struct m_ext2fs *, u_int, const char *);
     89 static u_long	ext2fs_hashalloc(struct inode *, int, long, int,
     90 		    daddr_t (*)(struct inode *, int, daddr_t, int));
     91 static daddr_t	ext2fs_nodealloccg(struct inode *, int, daddr_t, int);
     92 static daddr_t	ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t);
     93 static __inline void	ext2fs_cg_update(struct m_ext2fs *, int, struct ext2_gd *, int, int, int, daddr_t);
     94 static uint16_t 	ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *);
     95 static void		ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *, char *);
     96 
     97 /*
     98  * Allocate a block in the file system.
     99  *
    100  * A preference may be optionally specified. If a preference is given
    101  * the following hierarchy is used to allocate a block:
    102  *   1) allocate the requested block.
    103  *   2) allocate a rotationally optimal block in the same cylinder.
    104  *   3) allocate a block in the same cylinder group.
    105  *   4) quadradically rehash into other cylinder groups, until an
    106  *	  available block is located.
    107  * If no block preference is given the following hierarchy is used
    108  * to allocate a block:
    109  *   1) allocate a block in the cylinder group that contains the
    110  *	  inode for the file.
    111  *   2) quadradically rehash into other cylinder groups, until an
    112  *	  available block is located.
    113  */
    114 int
    115 ext2fs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref,
    116     kauth_cred_t cred, daddr_t *bnp)
    117 {
    118 	struct m_ext2fs *fs;
    119 	daddr_t bno;
    120 	int cg;
    121 
    122 	*bnp = 0;
    123 	fs = ip->i_e2fs;
    124 #ifdef DIAGNOSTIC
    125 	if (cred == NOCRED)
    126 		panic("ext2fs_alloc: missing credential");
    127 #endif /* DIAGNOSTIC */
    128 	if (fs->e2fs.e2fs_fbcount == 0)
    129 		goto nospace;
    130 	if (kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
    131 	    NULL, NULL) != 0 &&
    132 	    freespace(fs) <= 0)
    133 		goto nospace;
    134 	if (bpref >= fs->e2fs.e2fs_bcount)
    135 		bpref = 0;
    136 	if (bpref == 0)
    137 		cg = ino_to_cg(fs, ip->i_number);
    138 	else
    139 		cg = dtog(fs, bpref);
    140 	bno = (daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
    141 	    ext2fs_alloccg);
    142 	if (bno > 0) {
    143 		ext2fs_setnblock(ip, ext2fs_nblock(ip) + btodb(fs->e2fs_bsize));
    144 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    145 		*bnp = bno;
    146 		return 0;
    147 	}
    148 nospace:
    149 	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
    150 	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
    151 	return ENOSPC;
    152 }
    153 
    154 /*
    155  * Allocate an inode in the file system.
    156  *
    157  * If allocating a directory, use ext2fs_dirpref to select the inode.
    158  * If allocating in a directory, the following hierarchy is followed:
    159  *   1) allocate the preferred inode.
    160  *   2) allocate an inode in the same cylinder group.
    161  *   3) quadradically rehash into other cylinder groups, until an
    162  *	  available inode is located.
    163  * If no inode preference is given the following hierarchy is used
    164  * to allocate an inode:
    165  *   1) allocate an inode in cylinder group 0.
    166  *   2) quadradically rehash into other cylinder groups, until an
    167  *	  available inode is located.
    168  */
    169 int
    170 ext2fs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, ino_t *inop)
    171 {
    172 	struct inode *pip;
    173 	struct m_ext2fs *fs;
    174 	ino_t ino, ipref;
    175 	int cg;
    176 
    177 	pip = VTOI(pvp);
    178 	fs = pip->i_e2fs;
    179 	if (fs->e2fs.e2fs_ficount == 0)
    180 		goto noinodes;
    181 
    182 	if ((mode & IFMT) == IFDIR)
    183 		cg = ext2fs_dirpref(fs);
    184 	else
    185 		cg = ino_to_cg(fs, pip->i_number);
    186 	ipref = cg * fs->e2fs.e2fs_ipg + 1;
    187 	ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
    188 	if (ino == 0)
    189 		goto noinodes;
    190 
    191 	*inop = ino;
    192 	return 0;
    193 
    194 noinodes:
    195 	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
    196 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
    197 	return ENOSPC;
    198 }
    199 
    200 /*
    201  * Find a cylinder to place a directory.
    202  *
    203  * The policy implemented by this algorithm is to select from
    204  * among those cylinder groups with above the average number of
    205  * free inodes, the one with the smallest number of directories.
    206  */
    207 static u_long
    208 ext2fs_dirpref(struct m_ext2fs *fs)
    209 {
    210 	int cg, maxspace, mincg, avgifree;
    211 
    212 	avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
    213 	maxspace = 0;
    214 	mincg = -1;
    215 	for (cg = 0; cg < fs->e2fs_ncg; cg++)
    216 		if (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) >= avgifree) {
    217 			if (mincg == -1 || fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree) > maxspace) {
    218 				mincg = cg;
    219 				maxspace = fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree);
    220 			}
    221 		}
    222 	return mincg;
    223 }
    224 
    225 /*
    226  * Select the desired position for the next block in a file.  The file is
    227  * logically divided into sections. The first section is composed of the
    228  * direct blocks. Each additional section contains fs_maxbpg blocks.
    229  *
    230  * If no blocks have been allocated in the first section, the policy is to
    231  * request a block in the same cylinder group as the inode that describes
    232  * the file. Otherwise, the policy is to try to allocate the blocks
    233  * contigously. The two fields of the ext2 inode extension (see
    234  * ufs/ufs/inode.h) help this.
    235  */
    236 daddr_t
    237 ext2fs_blkpref(struct inode *ip, daddr_t lbn, int indx,
    238 		int32_t *bap /* XXX ondisk32 */)
    239 {
    240 	struct m_ext2fs *fs;
    241 	int cg, i;
    242 
    243 	fs = ip->i_e2fs;
    244 	/*
    245 	 * if we are doing contigous lbn allocation, try to alloc blocks
    246 	 * contigously on disk
    247 	 */
    248 
    249 	if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
    250 		return ip->i_e2fs_last_blk + 1;
    251 	}
    252 
    253 	/*
    254 	 * bap, if provided, gives us a list of blocks to which we want to
    255 	 * stay close
    256 	 */
    257 
    258 	if (bap) {
    259 		for (i = indx; i >= 0 ; i--) {
    260 			if (bap[i]) {
    261 				return fs2h32(bap[i]) + 1;
    262 			}
    263 		}
    264 	}
    265 
    266 	/* fall back to the first block of the cylinder containing the inode */
    267 
    268 	cg = ino_to_cg(fs, ip->i_number);
    269 	return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
    270 }
    271 
    272 /*
    273  * Implement the cylinder overflow algorithm.
    274  *
    275  * The policy implemented by this algorithm is:
    276  *   1) allocate the block in its requested cylinder group.
    277  *   2) quadradically rehash on the cylinder group number.
    278  *   3) brute force search for a free block.
    279  */
    280 static u_long
    281 ext2fs_hashalloc(struct inode *ip, int cg, long pref, int size,
    282 		daddr_t (*allocator)(struct inode *, int, daddr_t, int))
    283 {
    284 	struct m_ext2fs *fs;
    285 	long result;
    286 	int i, icg = cg;
    287 
    288 	fs = ip->i_e2fs;
    289 	/*
    290 	 * 1: preferred cylinder group
    291 	 */
    292 	result = (*allocator)(ip, cg, pref, size);
    293 	if (result)
    294 		return result;
    295 	/*
    296 	 * 2: quadratic rehash
    297 	 */
    298 	for (i = 1; i < fs->e2fs_ncg; i *= 2) {
    299 		cg += i;
    300 		if (cg >= fs->e2fs_ncg)
    301 			cg -= fs->e2fs_ncg;
    302 		result = (*allocator)(ip, cg, 0, size);
    303 		if (result)
    304 			return result;
    305 	}
    306 	/*
    307 	 * 3: brute force search
    308 	 * Note that we start at i == 2, since 0 was checked initially,
    309 	 * and 1 is always checked in the quadratic rehash.
    310 	 */
    311 	cg = (icg + 2) % fs->e2fs_ncg;
    312 	for (i = 2; i < fs->e2fs_ncg; i++) {
    313 		result = (*allocator)(ip, cg, 0, size);
    314 		if (result)
    315 			return result;
    316 		cg++;
    317 		if (cg == fs->e2fs_ncg)
    318 			cg = 0;
    319 	}
    320 	return 0;
    321 }
    322 
    323 /*
    324  * Determine whether a block can be allocated.
    325  *
    326  * Check to see if a block of the appropriate size is available,
    327  * and if it is, allocate it.
    328  */
    329 
    330 static daddr_t
    331 ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
    332 {
    333 	struct m_ext2fs *fs;
    334 	char *bbp;
    335 	struct buf *bp;
    336 	/* XXX ondisk32 */
    337 	int error, bno, start, end, loc;
    338 
    339 	fs = ip->i_e2fs;
    340 	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
    341 		return 0;
    342 	error = bread(ip->i_devvp, EXT2_FSBTODB(fs,
    343 		fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap)),
    344 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
    345 	if (error) {
    346 		return 0;
    347 	}
    348 	bbp = (char *)bp->b_data;
    349 
    350 	if (dtog(fs, bpref) != cg)
    351 		bpref = 0;
    352 
    353 	/* initialize block bitmap now if uninit */
    354 	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
    355 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)))) {
    356 		ext2fs_init_bb(fs, cg, &fs->e2fs_gd[cg], bbp);
    357 		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_BLOCK_UNINIT);
    358 	}
    359 
    360 	if (bpref != 0) {
    361 		bpref = dtogd(fs, bpref);
    362 		/*
    363 		 * if the requested block is available, use it
    364 		 */
    365 		if (isclr(bbp, bpref)) {
    366 			bno = bpref;
    367 			goto gotit;
    368 		}
    369 	}
    370 	/*
    371 	 * no blocks in the requested cylinder, so take next
    372 	 * available one in this cylinder group.
    373 	 * first try to get 8 contigous blocks, then fall back to a single
    374 	 * block.
    375 	 */
    376 	if (bpref)
    377 		start = dtogd(fs, bpref) / NBBY;
    378 	else
    379 		start = 0;
    380 	end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
    381 	for (loc = start; loc < end; loc++) {
    382 		if (bbp[loc] == 0) {
    383 			bno = loc * NBBY;
    384 			goto gotit;
    385 		}
    386 	}
    387 	for (loc = 0; loc < start; loc++) {
    388 		if (bbp[loc] == 0) {
    389 			bno = loc * NBBY;
    390 			goto gotit;
    391 		}
    392 	}
    393 
    394 	bno = ext2fs_mapsearch(fs, bbp, bpref);
    395 #if 0
    396 	/*
    397 	 * XXX jdolecek mapsearch actually never fails, it panics instead.
    398 	 * If re-enabling, make sure to brele() before returning.
    399 	 */
    400 	if (bno < 0)
    401 		return 0;
    402 #endif
    403 gotit:
    404 #ifdef DIAGNOSTIC
    405 	if (isset(bbp, (daddr_t)bno)) {
    406 		printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
    407 			cg, bno, fs->e2fs_fsmnt);
    408 		panic("ext2fs_alloccg: dup alloc");
    409 	}
    410 #endif
    411 	setbit(bbp, (daddr_t)bno);
    412 	fs->e2fs.e2fs_fbcount--;
    413 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], -1, 0, 0, 0);
    414 	fs->e2fs_fmod = 1;
    415 	bdwrite(bp);
    416 	return cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno;
    417 }
    418 
    419 /*
    420  * Determine whether an inode can be allocated.
    421  *
    422  * Check to see if an inode is available, and if it is,
    423  * allocate it using the following policy:
    424  *   1) allocate the requested inode.
    425  *   2) allocate the next available inode after the requested
    426  *	  inode in the specified cylinder group.
    427  */
    428 static daddr_t
    429 ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
    430 {
    431 	struct m_ext2fs *fs;
    432 	char *ibp;
    433 	struct buf *bp;
    434 	int error, start, len, loc, map, i;
    435 
    436 	ipref--; /* to avoid a lot of (ipref -1) */
    437 	if (ipref == -1)
    438 		ipref = 0;
    439 	fs = ip->i_e2fs;
    440 	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
    441 		return 0;
    442 	error = bread(ip->i_devvp, EXT2_FSBTODB(fs,
    443 		fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap)),
    444 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
    445 	if (error) {
    446 		return 0;
    447 	}
    448 	ibp = (char *)bp->b_data;
    449 
    450 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0);
    451 
    452 	/* initialize inode bitmap now if uninit */
    453 	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
    454 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)))) {
    455 		KASSERT(fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) == fs->e2fs.e2fs_ipg);
    456 		memset(ibp, 0, fs->e2fs_bsize);
    457 		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_INODE_UNINIT);
    458 	}
    459 
    460 	if (ipref) {
    461 		ipref %= fs->e2fs.e2fs_ipg;
    462 		if (isclr(ibp, ipref))
    463 			goto gotit;
    464 	}
    465 	start = ipref / NBBY;
    466 	len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
    467 	loc = skpc(0xff, len, &ibp[start]);
    468 	if (loc == 0) {
    469 		len = start + 1;
    470 		start = 0;
    471 		loc = skpc(0xff, len, &ibp[0]);
    472 		if (loc == 0) {
    473 			printf("cg = %d, ipref = %lld, fs = %s\n",
    474 				cg, (long long)ipref, fs->e2fs_fsmnt);
    475 			panic("ext2fs_nodealloccg: map corrupted");
    476 			/* NOTREACHED */
    477 		}
    478 	}
    479 	i = start + len - loc;
    480 	map = ibp[i] ^ 0xff;
    481 	if (map == 0) {
    482 		printf("fs = %s\n", fs->e2fs_fsmnt);
    483 		panic("ext2fs_nodealloccg: inode not in map");
    484 	}
    485 	ipref = i * NBBY + ffs(map) - 1;
    486 gotit:
    487 	setbit(ibp, ipref);
    488 	fs->e2fs.e2fs_ficount--;
    489 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
    490 		0, -1, ((mode & IFMT) == IFDIR) ? 1 : 0, ipref);
    491 	fs->e2fs_fmod = 1;
    492 	bdwrite(bp);
    493 	return cg * fs->e2fs.e2fs_ipg + ipref + 1;
    494 }
    495 
    496 /*
    497  * Free a block.
    498  *
    499  * The specified block is placed back in the
    500  * free map.
    501  */
    502 void
    503 ext2fs_blkfree(struct inode *ip, daddr_t bno)
    504 {
    505 	struct m_ext2fs *fs;
    506 	char *bbp;
    507 	struct buf *bp;
    508 	int error, cg;
    509 
    510 	fs = ip->i_e2fs;
    511 	cg = dtog(fs, bno);
    512 
    513 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)) == 0);
    514 
    515 	if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
    516 		printf("bad block %lld, ino %llu\n", (long long)bno,
    517 		    (unsigned long long)ip->i_number);
    518 		ext2fs_fserr(fs, ip->i_uid, "bad block");
    519 		return;
    520 	}
    521 	error = bread(ip->i_devvp,
    522 		EXT2_FSBTODB(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap)),
    523 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
    524 	if (error) {
    525 		return;
    526 	}
    527 	bbp = (char *)bp->b_data;
    528 	bno = dtogd(fs, bno);
    529 	if (isclr(bbp, bno)) {
    530 		printf("dev = 0x%llx, block = %lld, fs = %s\n",
    531 		    (unsigned long long)ip->i_dev, (long long)bno,
    532 		    fs->e2fs_fsmnt);
    533 		panic("blkfree: freeing free block");
    534 	}
    535 	clrbit(bbp, bno);
    536 	fs->e2fs.e2fs_fbcount++;
    537 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 1, 0, 0, 0);
    538 	fs->e2fs_fmod = 1;
    539 	bdwrite(bp);
    540 }
    541 
    542 /*
    543  * Free an inode.
    544  *
    545  * The specified inode is placed back in the free map.
    546  */
    547 int
    548 ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode)
    549 {
    550 	struct m_ext2fs *fs;
    551 	char *ibp;
    552 	struct inode *pip;
    553 	struct buf *bp;
    554 	int error, cg;
    555 
    556 	pip = VTOI(pvp);
    557 	fs = pip->i_e2fs;
    558 
    559 	if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
    560 		panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s",
    561 		    (unsigned long long)pip->i_dev, (unsigned long long)ino,
    562 		    fs->e2fs_fsmnt);
    563 
    564 	cg = ino_to_cg(fs, ino);
    565 
    566 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0);
    567 
    568 	error = bread(pip->i_devvp,
    569 		EXT2_FSBTODB(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap)),
    570 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
    571 	if (error) {
    572 		return 0;
    573 	}
    574 	ibp = (char *)bp->b_data;
    575 	ino = (ino - 1) % fs->e2fs.e2fs_ipg;
    576 	if (isclr(ibp, ino)) {
    577 		printf("dev = 0x%llx, ino = %llu, fs = %s\n",
    578 		    (unsigned long long)pip->i_dev,
    579 		    (unsigned long long)ino, fs->e2fs_fsmnt);
    580 		if (fs->e2fs_ronly == 0)
    581 			panic("ifree: freeing free inode");
    582 	}
    583 	clrbit(ibp, ino);
    584 	fs->e2fs.e2fs_ficount++;
    585 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
    586 		0, 1, ((mode & IFMT) == IFDIR) ? -1 : 0, 0);
    587 	fs->e2fs_fmod = 1;
    588 	bdwrite(bp);
    589 	return 0;
    590 }
    591 
    592 /*
    593  * Find a block in the specified cylinder group.
    594  *
    595  * It is a panic if a request is made to find a block if none are
    596  * available.
    597  */
    598 
    599 static daddr_t
    600 ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
    601 {
    602 	int start, len, loc, i, map;
    603 
    604 	/*
    605 	 * find the fragment by searching through the free block
    606 	 * map for an appropriate bit pattern
    607 	 */
    608 	if (bpref)
    609 		start = dtogd(fs, bpref) / NBBY;
    610 	else
    611 		start = 0;
    612 	len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
    613 	loc = skpc(0xff, len, &bbp[start]);
    614 	if (loc == 0) {
    615 		len = start + 1;
    616 		start = 0;
    617 		loc = skpc(0xff, len, &bbp[start]);
    618 		if (loc == 0) {
    619 			printf("start = %d, len = %d, fs = %s\n",
    620 				start, len, fs->e2fs_fsmnt);
    621 			panic("ext2fs_alloccg: map corrupted");
    622 			/* NOTREACHED */
    623 		}
    624 	}
    625 	i = start + len - loc;
    626 	map = bbp[i] ^ 0xff;
    627 	if (map == 0) {
    628 		printf("fs = %s\n", fs->e2fs_fsmnt);
    629 		panic("ext2fs_mapsearch: block not in map");
    630 	}
    631 	return i * NBBY + ffs(map) - 1;
    632 }
    633 
    634 /*
    635  * Fserr prints the name of a file system with an error diagnostic.
    636  *
    637  * The form of the error message is:
    638  *	fs: error message
    639  */
    640 static void
    641 ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp)
    642 {
    643 
    644 	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
    645 }
    646 
    647 static __inline void
    648 ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff)
    649 {
    650 	/* XXX disk32 */
    651 	if (nifree) {
    652 		gd->ext2bgd_nifree = h2fs16(fs2h16(gd->ext2bgd_nifree) + nifree);
    653 		/*
    654 		 * If we allocated inode on bigger offset than what was
    655 		 * ever used before, bump the itable_unused count. This
    656 		 * member only ever grows, and is used only for initialization
    657 		 * !INODE_ZEROED groups with used inodes. Of course, by the
    658 		 * time we get here the itables are already zeroed, but
    659 		 * e2fstools fsck.ext4 still checks this.
    660 		 */
    661 		if (E2FS_HAS_GD_CSUM(fs) && nifree < 0 && (ioff+1) >= (fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo))) {
    662 			gd->ext2bgd_itable_unused_lo = h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1));
    663 		}
    664 
    665 		KASSERT(!E2FS_HAS_GD_CSUM(fs) || gd->ext2bgd_itable_unused_lo <= gd->ext2bgd_nifree);
    666 	}
    667 
    668 
    669 	if (nbfree)
    670 		gd->ext2bgd_nbfree = h2fs16(fs2h16(gd->ext2bgd_nbfree) + nbfree);
    671 
    672 	if (ndirs)
    673 		gd->ext2bgd_ndirs = h2fs16(fs2h16(gd->ext2bgd_ndirs) + ndirs);
    674 
    675 	if (E2FS_HAS_GD_CSUM(fs))
    676 		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
    677 }
    678 
    679 /*
    680  * Compute group description csum. Structure data must be LE (not host).
    681  * Returned as LE (disk encoding).
    682  */
    683 static uint16_t
    684 ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd)
    685 {
    686 	uint16_t crc;
    687 	uint32_t cg_bswapped = h2fs32((uint32_t)cg);
    688 	size_t off;
    689 
    690 	if (!EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM))
    691 		return 0;
    692 
    693 	off = offsetof(struct ext2_gd, ext2bgd_checksum);
    694 
    695 	crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid, sizeof(fs->e2fs.e2fs_uuid));
    696 	crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped));
    697 	crc = crc16(crc, (uint8_t *)gd, off);
    698 	/* XXX ondisk32 */
    699 
    700 	return h2fs16(crc);
    701 }
    702 
    703 static void
    704 ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp)
    705 {
    706 	int i;
    707 
    708 	memset(bbp, 0, fs->e2fs_bsize);
    709 
    710 	/*
    711 	 * No block was ever allocated on this cg before, so the only used
    712 	 * blocks are metadata blocks on start of the group. We could optimize
    713 	 * this to set by bytes, but since this is done once per the group
    714 	 * in lifetime of filesystem, it really is not worth it.
    715 	 */
    716 	for(i=0; i < fs->e2fs.e2fs_bpg - fs2h16(gd->ext2bgd_nbfree); i++)
    717 		setbit(bbp, i);
    718 }
    719 
    720 /*
    721  * Verify csum and initialize itable if not done already
    722  */
    723 int
    724 ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly)
    725 {
    726 	/* XXX disk32 */
    727 	struct ext2_gd *gd;
    728 	ino_t ioff;
    729 	size_t boff;
    730 	struct buf *bp;
    731 	int cg, i, error;
    732 
    733 	if (!E2FS_HAS_GD_CSUM(fs))
    734 		return 0;
    735 
    736 	for(cg=0; cg < fs->e2fs_ncg; cg++) {
    737 		gd = &fs->e2fs_gd[cg];
    738 
    739 		/* Verify checksum */
    740 		if (gd->ext2bgd_checksum != ext2fs_cg_get_csum(fs, cg, gd)) {
    741 			printf("ext2fs_cg_verify_and_initialize: group %d invalid csum\n", cg);
    742 			return EINVAL;
    743 		}
    744 
    745 		/* if mounting read-write, zero itable if not already done */
    746 		if (ronly || (gd->ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0)
    747 			continue;
    748 
    749 		/*
    750 		 * We are skipping already used inodes, zero rest of itable
    751 		 * blocks. First block to zero could be only partial wipe, all
    752 		 * others are wiped completely. This might take a while,
    753 		 * there could be many inode table blocks. We use
    754 		 * delayed writes, so this shouldn't block for very
    755 		 * long.
    756 		 */
    757 		ioff = fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo);
    758 		boff = (ioff % fs->e2fs_ipb) * EXT2_DINODE_SIZE(fs);
    759 
    760 		for(i = ioff / fs->e2fs_ipb; i < fs->e2fs_itpg; i++) {
    761 			if (boff) {
    762 				/* partial wipe, must read old data */
    763 				error = bread(devvp,
    764 					EXT2_FSBTODB(fs, fs2h32(gd->ext2bgd_i_tables) + i),
    765 					(int)fs->e2fs_bsize, B_MODIFY, &bp);
    766 				if (error) {
    767 					printf("ext2fs_cg_verify_and_initialize: can't read itable block");
    768 					return error;
    769 				}
    770 				memset((char *)bp->b_data + boff, 0, fs->e2fs_bsize - boff);
    771 				boff = 0;
    772 			} else {
    773 				/*
    774 				 * Complete wipe, don't need to read data. This
    775 				 * assumes nothing else is changing the data.
    776 				 */
    777 				bp = getblk(devvp,
    778 					EXT2_FSBTODB(fs, fs2h32(gd->ext2bgd_i_tables) + i),
    779 					(int)fs->e2fs_bsize, 0, 0);
    780 				clrbuf(bp);
    781 			}
    782 
    783 			bdwrite(bp);
    784 		}
    785 
    786 		gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED);
    787 		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
    788 		fs->e2fs_fmod = 1;
    789 	}
    790 
    791 	return 0;
    792 }
    793