Home | History | Annotate | Line # | Download | only in ext2fs
ext2fs_alloc.c revision 1.49
      1 /*	$NetBSD: ext2fs_alloc.c,v 1.49 2016/08/20 19:47:44 jdolecek Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1982, 1986, 1989, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  *
     31  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
     32  *  Modified for ext2fs by Manuel Bouyer.
     33  */
     34 
     35 /*
     36  * Copyright (c) 1997 Manuel Bouyer.
     37  *
     38  * Redistribution and use in source and binary forms, with or without
     39  * modification, are permitted provided that the following conditions
     40  * are met:
     41  * 1. Redistributions of source code must retain the above copyright
     42  *    notice, this list of conditions and the following disclaimer.
     43  * 2. Redistributions in binary form must reproduce the above copyright
     44  *    notice, this list of conditions and the following disclaimer in the
     45  *    documentation and/or other materials provided with the distribution.
     46  *
     47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     48  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     49  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     50  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     51  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     52  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     53  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     54  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     55  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     56  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     57  *
     58  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
     59  *  Modified for ext2fs by Manuel Bouyer.
     60  */
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.49 2016/08/20 19:47:44 jdolecek Exp $");
     64 
     65 #include <sys/param.h>
     66 #include <sys/systm.h>
     67 #include <sys/buf.h>
     68 #include <sys/proc.h>
     69 #include <sys/vnode.h>
     70 #include <sys/mount.h>
     71 #include <sys/kernel.h>
     72 #include <sys/syslog.h>
     73 #include <sys/kauth.h>
     74 
     75 #include <lib/libkern/crc16.h>
     76 
     77 #include <ufs/ufs/inode.h>
     78 #include <ufs/ufs/ufs_extern.h>
     79 #include <ufs/ufs/ufsmount.h>
     80 
     81 #include <ufs/ext2fs/ext2fs.h>
     82 #include <ufs/ext2fs/ext2fs_extern.h>
     83 
     84 u_long ext2gennumber;
     85 
     86 static daddr_t	ext2fs_alloccg(struct inode *, int, daddr_t, int);
     87 static u_long	ext2fs_dirpref(struct m_ext2fs *);
     88 static void	ext2fs_fserr(struct m_ext2fs *, u_int, const char *);
     89 static u_long	ext2fs_hashalloc(struct inode *, int, long, int,
     90 		    daddr_t (*)(struct inode *, int, daddr_t, int));
     91 static daddr_t	ext2fs_nodealloccg(struct inode *, int, daddr_t, int);
     92 static daddr_t	ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t);
     93 static __inline void	ext2fs_cg_update(struct m_ext2fs *, int, struct ext2_gd *, int, int, int, daddr_t);
     94 static uint16_t 	ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *);
     95 static void		ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *, char *);
     96 
     97 /*
     98  * Allocate a block in the file system.
     99  *
    100  * A preference may be optionally specified. If a preference is given
    101  * the following hierarchy is used to allocate a block:
    102  *   1) allocate the requested block.
    103  *   2) allocate a rotationally optimal block in the same cylinder.
    104  *   3) allocate a block in the same cylinder group.
    105  *   4) quadradically rehash into other cylinder groups, until an
    106  *	  available block is located.
    107  * If no block preference is given the following hierarchy is used
    108  * to allocate a block:
    109  *   1) allocate a block in the cylinder group that contains the
    110  *	  inode for the file.
    111  *   2) quadradically rehash into other cylinder groups, until an
    112  *	  available block is located.
    113  */
    114 int
    115 ext2fs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref,
    116     kauth_cred_t cred, daddr_t *bnp)
    117 {
    118 	struct m_ext2fs *fs;
    119 	daddr_t bno;
    120 	int cg;
    121 
    122 	*bnp = 0;
    123 	fs = ip->i_e2fs;
    124 #ifdef DIAGNOSTIC
    125 	if (cred == NOCRED)
    126 		panic("ext2fs_alloc: missing credential");
    127 #endif /* DIAGNOSTIC */
    128 	if (fs->e2fs.e2fs_fbcount == 0)
    129 		goto nospace;
    130 	if (kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
    131 	    NULL, NULL) != 0 &&
    132 	    freespace(fs) <= 0)
    133 		goto nospace;
    134 	if (bpref >= fs->e2fs.e2fs_bcount)
    135 		bpref = 0;
    136 	if (bpref == 0)
    137 		cg = ino_to_cg(fs, ip->i_number);
    138 	else
    139 		cg = dtog(fs, bpref);
    140 	bno = (daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
    141 	    ext2fs_alloccg);
    142 	if (bno > 0) {
    143 		ext2fs_setnblock(ip, ext2fs_nblock(ip) + btodb(fs->e2fs_bsize));
    144 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    145 		*bnp = bno;
    146 		return 0;
    147 	}
    148 nospace:
    149 	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
    150 	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
    151 	return ENOSPC;
    152 }
    153 
    154 /*
    155  * Allocate an inode in the file system.
    156  *
    157  * If allocating a directory, use ext2fs_dirpref to select the inode.
    158  * If allocating in a directory, the following hierarchy is followed:
    159  *   1) allocate the preferred inode.
    160  *   2) allocate an inode in the same cylinder group.
    161  *   3) quadradically rehash into other cylinder groups, until an
    162  *	  available inode is located.
    163  * If no inode preference is given the following hierarchy is used
    164  * to allocate an inode:
    165  *   1) allocate an inode in cylinder group 0.
    166  *   2) quadradically rehash into other cylinder groups, until an
    167  *	  available inode is located.
    168  */
    169 int
    170 ext2fs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
    171     struct vnode **vpp)
    172 {
    173 	struct inode *pip;
    174 	struct m_ext2fs *fs;
    175 	struct inode *ip;
    176 	ino_t ino, ipref;
    177 	int cg, error;
    178 
    179 	*vpp = NULL;
    180 	pip = VTOI(pvp);
    181 	fs = pip->i_e2fs;
    182 	if (fs->e2fs.e2fs_ficount == 0)
    183 		goto noinodes;
    184 
    185 	if ((mode & IFMT) == IFDIR)
    186 		cg = ext2fs_dirpref(fs);
    187 	else
    188 		cg = ino_to_cg(fs, pip->i_number);
    189 	ipref = cg * fs->e2fs.e2fs_ipg + 1;
    190 	ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
    191 	if (ino == 0)
    192 		goto noinodes;
    193 	error = VFS_VGET(pvp->v_mount, ino, vpp);
    194 	if (error) {
    195 		ext2fs_vfree(pvp, ino, mode);
    196 		return error;
    197 	}
    198 	ip = VTOI(*vpp);
    199 
    200 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[ino_to_cg(fs, ino)].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0);
    201 
    202 	/* check for already used inode; makes sense only for ZEROED itable */
    203 	if (__predict_false(ip->i_e2fs_mode && ip->i_e2fs_nlink != 0)) {
    204 		printf("mode = 0%o, nlinks %d, inum = %llu, fs = %s\n",
    205 		    ip->i_e2fs_mode, ip->i_e2fs_nlink,
    206 		    (unsigned long long)ip->i_number, fs->e2fs_fsmnt);
    207 		panic("ext2fs_valloc: dup alloc");
    208 	}
    209 
    210 	memset(ip->i_din.e2fs_din, 0, EXT2_DINODE_SIZE(fs));
    211 
    212 	/*
    213 	 * Set up a new generation number for this inode.
    214 	 */
    215 	if (++ext2gennumber < time_second)
    216 		ext2gennumber = time_second;
    217 	ip->i_e2fs_gen = ext2gennumber;
    218 	return 0;
    219 noinodes:
    220 	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
    221 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
    222 	return ENOSPC;
    223 }
    224 
    225 /*
    226  * Find a cylinder to place a directory.
    227  *
    228  * The policy implemented by this algorithm is to select from
    229  * among those cylinder groups with above the average number of
    230  * free inodes, the one with the smallest number of directories.
    231  */
    232 static u_long
    233 ext2fs_dirpref(struct m_ext2fs *fs)
    234 {
    235 	int cg, maxspace, mincg, avgifree;
    236 
    237 	avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
    238 	maxspace = 0;
    239 	mincg = -1;
    240 	for (cg = 0; cg < fs->e2fs_ncg; cg++)
    241 		if (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) >= avgifree) {
    242 			if (mincg == -1 || fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree) > maxspace) {
    243 				mincg = cg;
    244 				maxspace = fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree);
    245 			}
    246 		}
    247 	return mincg;
    248 }
    249 
    250 /*
    251  * Select the desired position for the next block in a file.  The file is
    252  * logically divided into sections. The first section is composed of the
    253  * direct blocks. Each additional section contains fs_maxbpg blocks.
    254  *
    255  * If no blocks have been allocated in the first section, the policy is to
    256  * request a block in the same cylinder group as the inode that describes
    257  * the file. Otherwise, the policy is to try to allocate the blocks
    258  * contigously. The two fields of the ext2 inode extension (see
    259  * ufs/ufs/inode.h) help this.
    260  */
    261 daddr_t
    262 ext2fs_blkpref(struct inode *ip, daddr_t lbn, int indx,
    263 		int32_t *bap /* XXX ondisk32 */)
    264 {
    265 	struct m_ext2fs *fs;
    266 	int cg, i;
    267 
    268 	fs = ip->i_e2fs;
    269 	/*
    270 	 * if we are doing contigous lbn allocation, try to alloc blocks
    271 	 * contigously on disk
    272 	 */
    273 
    274 	if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
    275 		return ip->i_e2fs_last_blk + 1;
    276 	}
    277 
    278 	/*
    279 	 * bap, if provided, gives us a list of blocks to which we want to
    280 	 * stay close
    281 	 */
    282 
    283 	if (bap) {
    284 		for (i = indx; i >= 0 ; i--) {
    285 			if (bap[i]) {
    286 				return fs2h32(bap[i]) + 1;
    287 			}
    288 		}
    289 	}
    290 
    291 	/* fall back to the first block of the cylinder containing the inode */
    292 
    293 	cg = ino_to_cg(fs, ip->i_number);
    294 	return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
    295 }
    296 
    297 /*
    298  * Implement the cylinder overflow algorithm.
    299  *
    300  * The policy implemented by this algorithm is:
    301  *   1) allocate the block in its requested cylinder group.
    302  *   2) quadradically rehash on the cylinder group number.
    303  *   3) brute force search for a free block.
    304  */
    305 static u_long
    306 ext2fs_hashalloc(struct inode *ip, int cg, long pref, int size,
    307 		daddr_t (*allocator)(struct inode *, int, daddr_t, int))
    308 {
    309 	struct m_ext2fs *fs;
    310 	long result;
    311 	int i, icg = cg;
    312 
    313 	fs = ip->i_e2fs;
    314 	/*
    315 	 * 1: preferred cylinder group
    316 	 */
    317 	result = (*allocator)(ip, cg, pref, size);
    318 	if (result)
    319 		return result;
    320 	/*
    321 	 * 2: quadratic rehash
    322 	 */
    323 	for (i = 1; i < fs->e2fs_ncg; i *= 2) {
    324 		cg += i;
    325 		if (cg >= fs->e2fs_ncg)
    326 			cg -= fs->e2fs_ncg;
    327 		result = (*allocator)(ip, cg, 0, size);
    328 		if (result)
    329 			return result;
    330 	}
    331 	/*
    332 	 * 3: brute force search
    333 	 * Note that we start at i == 2, since 0 was checked initially,
    334 	 * and 1 is always checked in the quadratic rehash.
    335 	 */
    336 	cg = (icg + 2) % fs->e2fs_ncg;
    337 	for (i = 2; i < fs->e2fs_ncg; i++) {
    338 		result = (*allocator)(ip, cg, 0, size);
    339 		if (result)
    340 			return result;
    341 		cg++;
    342 		if (cg == fs->e2fs_ncg)
    343 			cg = 0;
    344 	}
    345 	return 0;
    346 }
    347 
    348 /*
    349  * Determine whether a block can be allocated.
    350  *
    351  * Check to see if a block of the appropriate size is available,
    352  * and if it is, allocate it.
    353  */
    354 
    355 static daddr_t
    356 ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
    357 {
    358 	struct m_ext2fs *fs;
    359 	char *bbp;
    360 	struct buf *bp;
    361 	/* XXX ondisk32 */
    362 	int error, bno, start, end, loc;
    363 
    364 	fs = ip->i_e2fs;
    365 	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
    366 		return 0;
    367 	error = bread(ip->i_devvp, EXT2_FSBTODB(fs,
    368 		fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap)),
    369 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
    370 	if (error) {
    371 		return 0;
    372 	}
    373 	bbp = (char *)bp->b_data;
    374 
    375 	if (dtog(fs, bpref) != cg)
    376 		bpref = 0;
    377 
    378 	/* initialize block bitmap now if uninit */
    379 	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
    380 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)))) {
    381 		ext2fs_init_bb(fs, cg, &fs->e2fs_gd[cg], bbp);
    382 		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_BLOCK_UNINIT);
    383 	}
    384 
    385 	if (bpref != 0) {
    386 		bpref = dtogd(fs, bpref);
    387 		/*
    388 		 * if the requested block is available, use it
    389 		 */
    390 		if (isclr(bbp, bpref)) {
    391 			bno = bpref;
    392 			goto gotit;
    393 		}
    394 	}
    395 	/*
    396 	 * no blocks in the requested cylinder, so take next
    397 	 * available one in this cylinder group.
    398 	 * first try to get 8 contigous blocks, then fall back to a single
    399 	 * block.
    400 	 */
    401 	if (bpref)
    402 		start = dtogd(fs, bpref) / NBBY;
    403 	else
    404 		start = 0;
    405 	end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
    406 	for (loc = start; loc < end; loc++) {
    407 		if (bbp[loc] == 0) {
    408 			bno = loc * NBBY;
    409 			goto gotit;
    410 		}
    411 	}
    412 	for (loc = 0; loc < start; loc++) {
    413 		if (bbp[loc] == 0) {
    414 			bno = loc * NBBY;
    415 			goto gotit;
    416 		}
    417 	}
    418 
    419 	bno = ext2fs_mapsearch(fs, bbp, bpref);
    420 	if (bno < 0)
    421 		return 0;
    422 gotit:
    423 #ifdef DIAGNOSTIC
    424 	if (isset(bbp, (daddr_t)bno)) {
    425 		printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
    426 			cg, bno, fs->e2fs_fsmnt);
    427 		panic("ext2fs_alloccg: dup alloc");
    428 	}
    429 #endif
    430 	setbit(bbp, (daddr_t)bno);
    431 	fs->e2fs.e2fs_fbcount--;
    432 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], -1, 0, 0, 0);
    433 	fs->e2fs_fmod = 1;
    434 	bdwrite(bp);
    435 	return cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno;
    436 }
    437 
    438 /*
    439  * Determine whether an inode can be allocated.
    440  *
    441  * Check to see if an inode is available, and if it is,
    442  * allocate it using the following policy:
    443  *   1) allocate the requested inode.
    444  *   2) allocate the next available inode after the requested
    445  *	  inode in the specified cylinder group.
    446  */
    447 static daddr_t
    448 ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
    449 {
    450 	struct m_ext2fs *fs;
    451 	char *ibp;
    452 	struct buf *bp;
    453 	int error, start, len, loc, map, i;
    454 
    455 	ipref--; /* to avoid a lot of (ipref -1) */
    456 	if (ipref == -1)
    457 		ipref = 0;
    458 	fs = ip->i_e2fs;
    459 	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
    460 		return 0;
    461 	error = bread(ip->i_devvp, EXT2_FSBTODB(fs,
    462 		fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap)),
    463 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
    464 	if (error) {
    465 		return 0;
    466 	}
    467 	ibp = (char *)bp->b_data;
    468 
    469 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0);
    470 
    471 	/* initialize inode bitmap now if uninit */
    472 	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
    473 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)))) {
    474 		KASSERT(fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) == fs->e2fs.e2fs_ipg);
    475 		memset(ibp, 0, fs->e2fs_bsize);
    476 		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_INODE_UNINIT);
    477 	}
    478 
    479 	if (ipref) {
    480 		ipref %= fs->e2fs.e2fs_ipg;
    481 		if (isclr(ibp, ipref))
    482 			goto gotit;
    483 	}
    484 	start = ipref / NBBY;
    485 	len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
    486 	loc = skpc(0xff, len, &ibp[start]);
    487 	if (loc == 0) {
    488 		len = start + 1;
    489 		start = 0;
    490 		loc = skpc(0xff, len, &ibp[0]);
    491 		if (loc == 0) {
    492 			printf("cg = %d, ipref = %lld, fs = %s\n",
    493 				cg, (long long)ipref, fs->e2fs_fsmnt);
    494 			panic("ext2fs_nodealloccg: map corrupted");
    495 			/* NOTREACHED */
    496 		}
    497 	}
    498 	i = start + len - loc;
    499 	map = ibp[i] ^ 0xff;
    500 	if (map == 0) {
    501 		printf("fs = %s\n", fs->e2fs_fsmnt);
    502 		panic("ext2fs_nodealloccg: inode not in map");
    503 	}
    504 	ipref = i * NBBY + ffs(map) - 1;
    505 gotit:
    506 	setbit(ibp, ipref);
    507 	fs->e2fs.e2fs_ficount--;
    508 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
    509 		0, -1, ((mode & IFMT) == IFDIR) ? 1 : 0, ipref);
    510 	fs->e2fs_fmod = 1;
    511 	bdwrite(bp);
    512 	return cg * fs->e2fs.e2fs_ipg + ipref + 1;
    513 }
    514 
    515 /*
    516  * Free a block.
    517  *
    518  * The specified block is placed back in the
    519  * free map.
    520  */
    521 void
    522 ext2fs_blkfree(struct inode *ip, daddr_t bno)
    523 {
    524 	struct m_ext2fs *fs;
    525 	char *bbp;
    526 	struct buf *bp;
    527 	int error, cg;
    528 
    529 	fs = ip->i_e2fs;
    530 	cg = dtog(fs, bno);
    531 
    532 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)) == 0);
    533 
    534 	if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
    535 		printf("bad block %lld, ino %llu\n", (long long)bno,
    536 		    (unsigned long long)ip->i_number);
    537 		ext2fs_fserr(fs, ip->i_uid, "bad block");
    538 		return;
    539 	}
    540 	error = bread(ip->i_devvp,
    541 		EXT2_FSBTODB(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap)),
    542 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
    543 	if (error) {
    544 		return;
    545 	}
    546 	bbp = (char *)bp->b_data;
    547 	bno = dtogd(fs, bno);
    548 	if (isclr(bbp, bno)) {
    549 		printf("dev = 0x%llx, block = %lld, fs = %s\n",
    550 		    (unsigned long long)ip->i_dev, (long long)bno,
    551 		    fs->e2fs_fsmnt);
    552 		panic("blkfree: freeing free block");
    553 	}
    554 	clrbit(bbp, bno);
    555 	fs->e2fs.e2fs_fbcount++;
    556 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 1, 0, 0, 0);
    557 	fs->e2fs_fmod = 1;
    558 	bdwrite(bp);
    559 }
    560 
    561 /*
    562  * Free an inode.
    563  *
    564  * The specified inode is placed back in the free map.
    565  */
    566 int
    567 ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode)
    568 {
    569 	struct m_ext2fs *fs;
    570 	char *ibp;
    571 	struct inode *pip;
    572 	struct buf *bp;
    573 	int error, cg;
    574 
    575 	pip = VTOI(pvp);
    576 	fs = pip->i_e2fs;
    577 
    578 	if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
    579 		panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s",
    580 		    (unsigned long long)pip->i_dev, (unsigned long long)ino,
    581 		    fs->e2fs_fsmnt);
    582 
    583 	cg = ino_to_cg(fs, ino);
    584 
    585 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0);
    586 
    587 	error = bread(pip->i_devvp,
    588 		EXT2_FSBTODB(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap)),
    589 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
    590 	if (error) {
    591 		return 0;
    592 	}
    593 	ibp = (char *)bp->b_data;
    594 	ino = (ino - 1) % fs->e2fs.e2fs_ipg;
    595 	if (isclr(ibp, ino)) {
    596 		printf("dev = 0x%llx, ino = %llu, fs = %s\n",
    597 		    (unsigned long long)pip->i_dev,
    598 		    (unsigned long long)ino, fs->e2fs_fsmnt);
    599 		if (fs->e2fs_ronly == 0)
    600 			panic("ifree: freeing free inode");
    601 	}
    602 	clrbit(ibp, ino);
    603 	fs->e2fs.e2fs_ficount++;
    604 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
    605 		0, 1, ((mode & IFMT) == IFDIR) ? -1 : 0, 0);
    606 	fs->e2fs_fmod = 1;
    607 	bdwrite(bp);
    608 	return 0;
    609 }
    610 
    611 /*
    612  * Find a block in the specified cylinder group.
    613  *
    614  * It is a panic if a request is made to find a block if none are
    615  * available.
    616  */
    617 
    618 static daddr_t
    619 ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
    620 {
    621 	int start, len, loc, i, map;
    622 
    623 	/*
    624 	 * find the fragment by searching through the free block
    625 	 * map for an appropriate bit pattern
    626 	 */
    627 	if (bpref)
    628 		start = dtogd(fs, bpref) / NBBY;
    629 	else
    630 		start = 0;
    631 	len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
    632 	loc = skpc(0xff, len, &bbp[start]);
    633 	if (loc == 0) {
    634 		len = start + 1;
    635 		start = 0;
    636 		loc = skpc(0xff, len, &bbp[start]);
    637 		if (loc == 0) {
    638 			printf("start = %d, len = %d, fs = %s\n",
    639 				start, len, fs->e2fs_fsmnt);
    640 			panic("ext2fs_alloccg: map corrupted");
    641 			/* NOTREACHED */
    642 		}
    643 	}
    644 	i = start + len - loc;
    645 	map = bbp[i] ^ 0xff;
    646 	if (map == 0) {
    647 		printf("fs = %s\n", fs->e2fs_fsmnt);
    648 		panic("ext2fs_mapsearch: block not in map");
    649 	}
    650 	return i * NBBY + ffs(map) - 1;
    651 }
    652 
    653 /*
    654  * Fserr prints the name of a file system with an error diagnostic.
    655  *
    656  * The form of the error message is:
    657  *	fs: error message
    658  */
    659 static void
    660 ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp)
    661 {
    662 
    663 	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
    664 }
    665 
    666 static __inline void
    667 ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff)
    668 {
    669 	/* XXX disk32 */
    670 	if (nifree) {
    671 		gd->ext2bgd_nifree = h2fs16(fs2h16(gd->ext2bgd_nifree) + nifree);
    672 		/*
    673 		 * If we allocated inode on bigger offset than what was
    674 		 * ever used before, bump the itable_unused count. This
    675 		 * member only ever grows, and is used only for initialization
    676 		 * !INODE_ZEROED groups with used inodes. Of course, by the
    677 		 * time we get here the itables are already zeroed, but
    678 		 * e2fstools fsck.ext4 still checks this.
    679 		 */
    680 		if (E2FS_HAS_GD_CSUM(fs) && nifree < 0 && (ioff+1) >= (fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo))) {
    681 			gd->ext2bgd_itable_unused_lo = h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1));
    682 		}
    683 
    684 		KASSERT(!E2FS_HAS_GD_CSUM(fs) || gd->ext2bgd_itable_unused_lo <= gd->ext2bgd_nifree);
    685 	}
    686 
    687 
    688 	if (nbfree)
    689 		gd->ext2bgd_nbfree = h2fs16(fs2h16(gd->ext2bgd_nbfree) + nbfree);
    690 
    691 	if (ndirs)
    692 		gd->ext2bgd_ndirs = h2fs16(fs2h16(gd->ext2bgd_ndirs) + ndirs);
    693 
    694 	if (E2FS_HAS_GD_CSUM(fs))
    695 		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
    696 }
    697 
    698 /*
    699  * Compute group description csum. Structure data must be LE (not host).
    700  * Returned as LE (disk encoding).
    701  */
    702 static uint16_t
    703 ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd)
    704 {
    705 	uint16_t crc;
    706 	uint32_t cg_bswapped = h2fs32((uint32_t)cg);
    707 	size_t off;
    708 
    709 	if (!EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM))
    710 		return 0;
    711 
    712 	off = offsetof(struct ext2_gd, ext2bgd_checksum);
    713 
    714 	crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid, sizeof(fs->e2fs.e2fs_uuid));
    715 	crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped));
    716 	crc = crc16(crc, (uint8_t *)gd, off);
    717 	/* XXX ondisk32 */
    718 
    719 	return h2fs16(crc);
    720 }
    721 
    722 static void
    723 ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp)
    724 {
    725 	int i;
    726 
    727 	memset(bbp, 0, fs->e2fs_bsize);
    728 
    729 	/*
    730 	 * No block was ever allocated on this cg before, so the only used
    731 	 * blocks are metadata blocks on start of the group. We could optimize
    732 	 * this to set by bytes, but since this is done once per the group
    733 	 * in lifetime of filesystem, it really is not worth it.
    734 	 */
    735 	for(i=0; i < fs->e2fs.e2fs_bpg - fs2h16(gd->ext2bgd_nbfree); i++)
    736 		setbit(bbp, i);
    737 }
    738 
    739 /*
    740  * Verify csum and initialize itable if not done already
    741  */
    742 int
    743 ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly)
    744 {
    745 	/* XXX disk32 */
    746 	struct ext2_gd *gd;
    747 	ino_t ioff;
    748 	size_t boff;
    749 	struct buf *bp;
    750 	int cg, i, error;
    751 
    752 	if (!E2FS_HAS_GD_CSUM(fs))
    753 		return 0;
    754 
    755 	for(cg=0; cg < fs->e2fs_ncg; cg++) {
    756 		gd = &fs->e2fs_gd[cg];
    757 
    758 		/* Verify checksum */
    759 		if (gd->ext2bgd_checksum != ext2fs_cg_get_csum(fs, cg, gd)) {
    760 			printf("ext2fs_cg_verify_and_initialize: group %d invalid csum\n", cg);
    761 			return EINVAL;
    762 		}
    763 
    764 		/* if mounting read-write, zero itable if not already done */
    765 		if (ronly || (gd->ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0)
    766 			continue;
    767 
    768 		/*
    769 		 * We are skipping already used inodes, zero rest of itable
    770 		 * blocks. First block to zero could be only partial wipe, all
    771 		 * others are wiped completely. This might take a while,
    772 		 * there could be many inode table blocks. We use
    773 		 * delayed writes, so this shouldn't block for very
    774 		 * long.
    775 		 */
    776 		ioff = fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo);
    777 		boff = (ioff % fs->e2fs_ipb) * EXT2_DINODE_SIZE(fs);
    778 
    779 		for(i = ioff / fs->e2fs_ipb; i < fs->e2fs_itpg; i++) {
    780 			if (boff) {
    781 				/* partial wipe, must read old data */
    782 				error = bread(devvp,
    783 					EXT2_FSBTODB(fs, fs2h32(gd->ext2bgd_i_tables) + i),
    784 					(int)fs->e2fs_bsize, B_MODIFY, &bp);
    785 				if (error) {
    786 					printf("ext2fs_cg_verify_and_initialize: can't read itable block");
    787 					return error;
    788 				}
    789 				memset((char *)bp->b_data + boff, 0, fs->e2fs_bsize - boff);
    790 				boff = 0;
    791 			} else {
    792 				/*
    793 				 * Complete wipe, don't need to read data. This
    794 				 * assumes nothing else is changing the data.
    795 				 */
    796 				bp = getblk(devvp,
    797 					EXT2_FSBTODB(fs, fs2h32(gd->ext2bgd_i_tables) + i),
    798 					(int)fs->e2fs_bsize, 0, 0);
    799 				clrbuf(bp);
    800 			}
    801 
    802 			bdwrite(bp);
    803 		}
    804 
    805 		gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED);
    806 		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
    807 		fs->e2fs_fmod = 1;
    808 	}
    809 
    810 	return 0;
    811 }
    812