Home | History | Annotate | Line # | Download | only in ffs
ffs_balloc.c revision 1.63.4.1
      1 /*	$NetBSD: ffs_balloc.c,v 1.63.4.1 2020/04/21 18:42:45 martin Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 Networks Associates Technology, Inc.
      5  * All rights reserved.
      6  *
      7  * This software was developed for the FreeBSD Project by Marshall
      8  * Kirk McKusick and Network Associates Laboratories, the Security
      9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
     10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
     11  * research program
     12  *
     13  * Copyright (c) 1982, 1986, 1989, 1993
     14  *	The Regents of the University of California.  All rights reserved.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. Neither the name of the University nor the names of its contributors
     25  *    may be used to endorse or promote products derived from this software
     26  *    without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38  * SUCH DAMAGE.
     39  *
     40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.63.4.1 2020/04/21 18:42:45 martin Exp $");
     45 
     46 #if defined(_KERNEL_OPT)
     47 #include "opt_quota.h"
     48 #endif
     49 
     50 #include <sys/param.h>
     51 #include <sys/systm.h>
     52 #include <sys/buf.h>
     53 #include <sys/file.h>
     54 #include <sys/mount.h>
     55 #include <sys/vnode.h>
     56 #include <sys/kauth.h>
     57 #include <sys/fstrans.h>
     58 
     59 #include <ufs/ufs/quota.h>
     60 #include <ufs/ufs/ufsmount.h>
     61 #include <ufs/ufs/inode.h>
     62 #include <ufs/ufs/ufs_extern.h>
     63 #include <ufs/ufs/ufs_bswap.h>
     64 
     65 #include <ufs/ffs/fs.h>
     66 #include <ufs/ffs/ffs_extern.h>
     67 
     68 #include <uvm/uvm.h>
     69 
     70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
     71     struct buf **);
     72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
     73     struct buf **);
     74 
     75 static daddr_t
     76 ffs_extb(struct fs *fs, struct ufs2_dinode *dp, daddr_t nb)
     77 {
     78 	return ufs_rw64(dp->di_extb[nb], UFS_FSNEEDSWAP(fs));
     79 }
     80 
     81 /*
     82  * Balloc defines the structure of file system storage
     83  * by allocating the physical blocks on a device given
     84  * the inode and the logical block number in a file.
     85  */
     86 
     87 int
     88 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
     89     struct buf **bpp)
     90 {
     91 	int error;
     92 
     93 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
     94 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
     95 	else
     96 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
     97 
     98 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
     99 		brelse(*bpp, 0);
    100 
    101 	return error;
    102 }
    103 
    104 static int
    105 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    106     int flags, struct buf **bpp)
    107 {
    108 	daddr_t lbn, lastlbn;
    109 	struct buf *bp, *nbp;
    110 	struct inode *ip = VTOI(vp);
    111 	struct fs *fs = ip->i_fs;
    112 	struct ufsmount *ump = ip->i_ump;
    113 	struct indir indirs[UFS_NIADDR + 2];
    114 	daddr_t newb, pref, nb;
    115 	int32_t *bap;	/* XXX ondisk32 */
    116 	int deallocated, osize, nsize, num, i, error;
    117 	int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
    118 	int32_t *allocib;
    119 	int unwindidx = -1;
    120 	const int needswap = UFS_FSNEEDSWAP(fs);
    121 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    122 
    123 	lbn = ffs_lblkno(fs, off);
    124 	size = ffs_blkoff(fs, off) + size;
    125 	if (size > fs->fs_bsize)
    126 		panic("ffs_balloc: blk too big");
    127 	if (bpp != NULL) {
    128 		*bpp = NULL;
    129 	}
    130 	UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
    131 	    lbn, size, 0);
    132 
    133 	if (lbn < 0)
    134 		return (EFBIG);
    135 
    136 	/*
    137 	 * If the next write will extend the file into a new block,
    138 	 * and the file is currently composed of a fragment
    139 	 * this fragment has to be extended to be a full block.
    140 	 */
    141 
    142 	lastlbn = ffs_lblkno(fs, ip->i_size);
    143 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
    144 		nb = lastlbn;
    145 		osize = ffs_blksize(fs, ip, nb);
    146 		if (osize < fs->fs_bsize && osize > 0) {
    147 			mutex_enter(&ump->um_lock);
    148 			error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, nb),
    149 				    ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
    150 					&ip->i_ffs1_db[0]),
    151 				    osize, (int)fs->fs_bsize, flags, cred, bpp,
    152 				    &newb);
    153 			if (error)
    154 				return (error);
    155 			ip->i_size = ffs_lblktosize(fs, nb + 1);
    156 			ip->i_ffs1_size = ip->i_size;
    157 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
    158 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
    159 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    160 			if (bpp && *bpp) {
    161 				if (flags & B_SYNC)
    162 					bwrite(*bpp);
    163 				else
    164 					bawrite(*bpp);
    165 			}
    166 		}
    167 	}
    168 
    169 	/*
    170 	 * The first UFS_NDADDR blocks are direct blocks
    171 	 */
    172 
    173 	if (lbn < UFS_NDADDR) {
    174 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
    175 		if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
    176 
    177 			/*
    178 			 * The block is an already-allocated direct block
    179 			 * and the file already extends past this block,
    180 			 * thus this must be a whole block.
    181 			 * Just read the block (if requested).
    182 			 */
    183 
    184 			if (bpp != NULL) {
    185 				error = bread(vp, lbn, fs->fs_bsize,
    186 					      B_MODIFY, bpp);
    187 				if (error) {
    188 					return (error);
    189 				}
    190 			}
    191 			return (0);
    192 		}
    193 		if (nb != 0) {
    194 
    195 			/*
    196 			 * Consider need to reallocate a fragment.
    197 			 */
    198 
    199 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
    200 			nsize = ffs_fragroundup(fs, size);
    201 			if (nsize <= osize) {
    202 
    203 				/*
    204 				 * The existing block is already
    205 				 * at least as big as we want.
    206 				 * Just read the block (if requested).
    207 				 */
    208 
    209 				if (bpp != NULL) {
    210 					error = bread(vp, lbn, osize,
    211 						      B_MODIFY, bpp);
    212 					if (error) {
    213 						return (error);
    214 					}
    215 				}
    216 				return 0;
    217 			} else {
    218 
    219 				/*
    220 				 * The existing block is smaller than we want,
    221 				 * grow it.
    222 				 */
    223 				mutex_enter(&ump->um_lock);
    224 				error = ffs_realloccg(ip, lbn,
    225 				    ffs_getdb(fs, ip, lbn),
    226 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    227 					&ip->i_ffs1_db[0]),
    228 				    osize, nsize, flags, cred, bpp, &newb);
    229 				if (error)
    230 					return (error);
    231 			}
    232 		} else {
    233 
    234 			/*
    235 			 * the block was not previously allocated,
    236 			 * allocate a new block or fragment.
    237 			 */
    238 
    239 			if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
    240 				nsize = ffs_fragroundup(fs, size);
    241 			else
    242 				nsize = fs->fs_bsize;
    243 			mutex_enter(&ump->um_lock);
    244 			error = ffs_alloc(ip, lbn,
    245 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    246 				&ip->i_ffs1_db[0]),
    247 			    nsize, flags, cred, &newb);
    248 			if (error)
    249 				return (error);
    250 			if (bpp != NULL) {
    251 				error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
    252 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    253 				if (error)
    254 					return error;
    255 			}
    256 		}
    257 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
    258 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    259 		return (0);
    260 	}
    261 
    262 	/*
    263 	 * Determine the number of levels of indirection.
    264 	 */
    265 
    266 	pref = 0;
    267 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    268 		return (error);
    269 
    270 	/*
    271 	 * Fetch the first indirect block allocating if necessary.
    272 	 */
    273 
    274 	--num;
    275 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
    276 	allocib = NULL;
    277 	allocblk = allociblk;
    278 	if (nb == 0) {
    279 		mutex_enter(&ump->um_lock);
    280 		pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
    281 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    282 		    flags | B_METAONLY, cred, &newb);
    283 		if (error)
    284 			goto fail;
    285 		nb = newb;
    286 		*allocblk++ = nb;
    287 		error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
    288 		    fs->fs_bsize, true, &bp);
    289 		if (error)
    290 			goto fail;
    291 		/*
    292 		 * Write synchronously so that indirect blocks
    293 		 * never point at garbage.
    294 		 */
    295 		if ((error = bwrite(bp)) != 0)
    296 			goto fail;
    297 		unwindidx = 0;
    298 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
    299 		*allocib = ufs_rw32(nb, needswap);
    300 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    301 	}
    302 
    303 	/*
    304 	 * Fetch through the indirect blocks, allocating as necessary.
    305 	 */
    306 
    307 	for (i = 1;;) {
    308 		error = bread(vp,
    309 		    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
    310 		if (error) {
    311 			goto fail;
    312 		}
    313 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
    314 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    315 		if (i == num)
    316 			break;
    317 		i++;
    318 		if (nb != 0) {
    319 			brelse(bp, 0);
    320 			continue;
    321 		}
    322 		if (fscow_run(bp, true) != 0) {
    323 			brelse(bp, 0);
    324 			goto fail;
    325 		}
    326 		mutex_enter(&ump->um_lock);
    327 		/* Try to keep snapshot indirect blocks contiguous. */
    328 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
    329 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
    330 			    flags | B_METAONLY, &bap[0]);
    331 		if (pref == 0)
    332 			pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
    333 			    NULL);
    334 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    335 		    flags | B_METAONLY, cred, &newb);
    336 		if (error) {
    337 			brelse(bp, 0);
    338 			goto fail;
    339 		}
    340 		nb = newb;
    341 		*allocblk++ = nb;
    342 		error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
    343 		    fs->fs_bsize, true, &nbp);
    344 		if (error) {
    345 			brelse(bp, 0);
    346 			goto fail;
    347 		}
    348 		/*
    349 		 * Write synchronously so that indirect blocks
    350 		 * never point at garbage.
    351 		 */
    352 		if ((error = bwrite(nbp)) != 0) {
    353 			brelse(bp, 0);
    354 			goto fail;
    355 		}
    356 		if (unwindidx < 0)
    357 			unwindidx = i - 1;
    358 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
    359 
    360 		/*
    361 		 * If required, write synchronously, otherwise use
    362 		 * delayed write.
    363 		 */
    364 
    365 		if (flags & B_SYNC) {
    366 			bwrite(bp);
    367 		} else {
    368 			bdwrite(bp);
    369 		}
    370 	}
    371 
    372 	if (flags & B_METAONLY) {
    373 		KASSERT(bpp != NULL);
    374 		*bpp = bp;
    375 		return (0);
    376 	}
    377 
    378 	/*
    379 	 * Get the data block, allocating if necessary.
    380 	 */
    381 
    382 	if (nb == 0) {
    383 		if (fscow_run(bp, true) != 0) {
    384 			brelse(bp, 0);
    385 			goto fail;
    386 		}
    387 		mutex_enter(&ump->um_lock);
    388 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
    389 		    &bap[0]);
    390 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    391 		    &newb);
    392 		if (error) {
    393 			brelse(bp, 0);
    394 			goto fail;
    395 		}
    396 		nb = newb;
    397 		*allocblk++ = nb;
    398 		if (bpp != NULL) {
    399 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    400 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    401 			if (error) {
    402 				brelse(bp, 0);
    403 				goto fail;
    404 			}
    405 		}
    406 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
    407 		if (allocib == NULL && unwindidx < 0) {
    408 			unwindidx = i - 1;
    409 		}
    410 
    411 		/*
    412 		 * If required, write synchronously, otherwise use
    413 		 * delayed write.
    414 		 */
    415 
    416 		if (flags & B_SYNC) {
    417 			bwrite(bp);
    418 		} else {
    419 			bdwrite(bp);
    420 		}
    421 		return (0);
    422 	}
    423 	brelse(bp, 0);
    424 	if (bpp != NULL) {
    425 		if (flags & B_CLRBUF) {
    426 			error = bread(vp, lbn, (int)fs->fs_bsize,
    427 			    B_MODIFY, &nbp);
    428 			if (error) {
    429 				goto fail;
    430 			}
    431 		} else {
    432 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    433 			    fs->fs_bsize, true, &nbp);
    434 			if (error)
    435 				goto fail;
    436 		}
    437 		*bpp = nbp;
    438 	}
    439 	return (0);
    440 
    441 fail:
    442 	/*
    443 	 * If we have failed part way through block allocation, we
    444 	 * have to deallocate any indirect blocks that we have allocated.
    445 	 */
    446 
    447 	if (unwindidx >= 0) {
    448 
    449 		/*
    450 		 * First write out any buffers we've created to resolve their
    451 		 * softdeps.  This must be done in reverse order of creation
    452 		 * so that we resolve the dependencies in one pass.
    453 		 * Write the cylinder group buffers for these buffers too.
    454 		 */
    455 
    456 		for (i = num; i >= unwindidx; i--) {
    457 			if (i == 0) {
    458 				break;
    459 			}
    460 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    461 			    fs->fs_bsize, false, &bp) != 0)
    462 				continue;
    463 			if (bp->b_oflags & BO_DELWRI) {
    464 				nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
    465 				    FFS_DBTOFSB(fs, bp->b_blkno))));
    466 				bwrite(bp);
    467 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    468 				    fs->fs_cgsize, false, &bp) != 0)
    469 					continue;
    470 				if (bp->b_oflags & BO_DELWRI) {
    471 					bwrite(bp);
    472 				} else {
    473 					brelse(bp, BC_INVAL);
    474 				}
    475 			} else {
    476 				brelse(bp, BC_INVAL);
    477 			}
    478 		}
    479 
    480 		/*
    481 		 * Undo the partial allocation.
    482 		 */
    483 		if (unwindidx == 0) {
    484 			*allocib = 0;
    485 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    486 		} else {
    487 			int r;
    488 
    489 			r = bread(vp, indirs[unwindidx].in_lbn,
    490 			    (int)fs->fs_bsize, 0, &bp);
    491 			if (r) {
    492 				panic("Could not unwind indirect block, error %d", r);
    493 			} else {
    494 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
    495 				bap[indirs[unwindidx].in_off] = 0;
    496 				bwrite(bp);
    497 			}
    498 		}
    499 		for (i = unwindidx + 1; i <= num; i++) {
    500 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    501 			    fs->fs_bsize, false, &bp) == 0)
    502 				brelse(bp, BC_INVAL);
    503 		}
    504 	}
    505 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    506 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
    507 		deallocated += fs->fs_bsize;
    508 	}
    509 	if (deallocated) {
    510 #if defined(QUOTA) || defined(QUOTA2)
    511 		/*
    512 		 * Restore user's disk quota because allocation failed.
    513 		 */
    514 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
    515 #endif
    516 		ip->i_ffs1_blocks -= btodb(deallocated);
    517 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    518 	}
    519 	return (error);
    520 }
    521 
    522 static int
    523 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    524     int flags, struct buf **bpp)
    525 {
    526 	daddr_t lbn, lastlbn;
    527 	struct buf *bp, *nbp;
    528 	struct inode *ip = VTOI(vp);
    529 	struct fs *fs = ip->i_fs;
    530 	struct ufsmount *ump = ip->i_ump;
    531 	struct indir indirs[UFS_NIADDR + 2];
    532 	daddr_t newb, pref, nb;
    533 	int64_t *bap;
    534 	int deallocated, osize, nsize, num, i, error;
    535 	daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
    536 	int64_t *allocib;
    537 	int unwindidx = -1;
    538 	const int needswap = UFS_FSNEEDSWAP(fs);
    539 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    540 
    541 	lbn = ffs_lblkno(fs, off);
    542 	size = ffs_blkoff(fs, off) + size;
    543 	if (size > fs->fs_bsize)
    544 		panic("ffs_balloc: blk too big");
    545 	if (bpp != NULL) {
    546 		*bpp = NULL;
    547 	}
    548 	UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
    549 	    lbn, size, 0);
    550 
    551 	if (lbn < 0)
    552 		return (EFBIG);
    553 
    554 	/*
    555 	 * Check for allocating external data.
    556 	 */
    557 	if (flags & IO_EXT) {
    558 		struct ufs2_dinode *dp = ip->i_din.ffs2_din;
    559 		if (lbn >= UFS_NXADDR)
    560 			return (EFBIG);
    561 		/*
    562 		 * If the next write will extend the data into a new block,
    563 		 * and the data is currently composed of a fragment
    564 		 * this fragment has to be extended to be a full block.
    565 		 */
    566 		lastlbn = ffs_lblkno(fs, dp->di_extsize);
    567 		if (lastlbn < lbn) {
    568 			nb = lastlbn;
    569 			osize = ffs_sblksize(fs, dp->di_extsize, nb);
    570 			if (osize < fs->fs_bsize && osize > 0) {
    571 				mutex_enter(&ump->um_lock);
    572 				error = ffs_realloccg(ip, -1 - nb,
    573 				    ffs_extb(fs, dp, nb),
    574 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
    575 					flags, &dp->di_extb[0]),
    576 				    osize, (int)fs->fs_bsize, flags, cred,
    577 				    &bp, &newb);
    578 				if (error)
    579 					return (error);
    580 				dp->di_extsize = ffs_lblktosize(fs, nb + 1);
    581 				dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno);
    582 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
    583 				if (flags & IO_SYNC)
    584 					bwrite(bp);
    585 				else
    586 					bawrite(bp);
    587 			}
    588 		}
    589 		/*
    590 		 * All blocks are direct blocks
    591 		 */
    592 		nb = dp->di_extb[lbn];
    593 		if (nb != 0 && dp->di_extsize >= ffs_lblktosize(fs, lbn + 1)) {
    594 			error = bread(vp, -1 - lbn, fs->fs_bsize,
    595 			    0, &bp);
    596 			if (error) {
    597 				return (error);
    598 			}
    599 			mutex_enter(bp->b_objlock);
    600 			bp->b_blkno = FFS_FSBTODB(fs, nb);
    601 			mutex_exit(bp->b_objlock);
    602 			*bpp = bp;
    603 			return (0);
    604 		}
    605 		if (nb != 0) {
    606 			/*
    607 			 * Consider need to reallocate a fragment.
    608 			 */
    609 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize));
    610 			nsize = ffs_fragroundup(fs, size);
    611 			if (nsize <= osize) {
    612 				error = bread(vp, -1 - lbn, osize,
    613 				    0, &bp);
    614 				if (error) {
    615 					return (error);
    616 				}
    617 				mutex_enter(bp->b_objlock);
    618 				bp->b_blkno = FFS_FSBTODB(fs, nb);
    619 				mutex_exit(bp->b_objlock);
    620 			} else {
    621 				mutex_enter(&ump->um_lock);
    622 				error = ffs_realloccg(ip, -1 - lbn,
    623 				    ffs_extb(fs, dp, lbn),
    624 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    625 				        &dp->di_extb[0]),
    626 				    osize, nsize, flags, cred, &bp, &newb);
    627 				if (error)
    628 					return (error);
    629 			}
    630 		} else {
    631 			if (dp->di_extsize < ffs_lblktosize(fs, lbn + 1))
    632 				nsize = ffs_fragroundup(fs, size);
    633 			else
    634 				nsize = fs->fs_bsize;
    635 			mutex_enter(&ump->um_lock);
    636 			error = ffs_alloc(ip, lbn,
    637 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    638 			       &dp->di_extb[0]),
    639 			   nsize, flags, cred, &newb);
    640 			if (error)
    641 				return (error);
    642 			error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb),
    643 			    nsize, (flags & B_CLRBUF) != 0, &bp);
    644 			if (error)
    645 				return error;
    646 		}
    647 		dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno);
    648 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    649 		*bpp = bp;
    650 		return (0);
    651 	}
    652 	/*
    653 	 * If the next write will extend the file into a new block,
    654 	 * and the file is currently composed of a fragment
    655 	 * this fragment has to be extended to be a full block.
    656 	 */
    657 
    658 	lastlbn = ffs_lblkno(fs, ip->i_size);
    659 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
    660 		nb = lastlbn;
    661 		osize = ffs_blksize(fs, ip, nb);
    662 		if (osize < fs->fs_bsize && osize > 0) {
    663 			mutex_enter(&ump->um_lock);
    664 			error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, lbn),
    665 				    ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
    666 					&ip->i_ffs2_db[0]),
    667 				    osize, (int)fs->fs_bsize, flags, cred, bpp,
    668 				    &newb);
    669 			if (error)
    670 				return (error);
    671 			ip->i_size = ffs_lblktosize(fs, nb + 1);
    672 			ip->i_ffs2_size = ip->i_size;
    673 			uvm_vnp_setsize(vp, ip->i_size);
    674 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
    675 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    676 			if (bpp) {
    677 				if (flags & B_SYNC)
    678 					bwrite(*bpp);
    679 				else
    680 					bawrite(*bpp);
    681 			}
    682 		}
    683 	}
    684 
    685 	/*
    686 	 * The first UFS_NDADDR blocks are direct blocks
    687 	 */
    688 
    689 	if (lbn < UFS_NDADDR) {
    690 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
    691 		if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
    692 
    693 			/*
    694 			 * The block is an already-allocated direct block
    695 			 * and the file already extends past this block,
    696 			 * thus this must be a whole block.
    697 			 * Just read the block (if requested).
    698 			 */
    699 
    700 			if (bpp != NULL) {
    701 				error = bread(vp, lbn, fs->fs_bsize,
    702 					      B_MODIFY, bpp);
    703 				if (error) {
    704 					return (error);
    705 				}
    706 			}
    707 			return (0);
    708 		}
    709 		if (nb != 0) {
    710 
    711 			/*
    712 			 * Consider need to reallocate a fragment.
    713 			 */
    714 
    715 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
    716 			nsize = ffs_fragroundup(fs, size);
    717 			if (nsize <= osize) {
    718 
    719 				/*
    720 				 * The existing block is already
    721 				 * at least as big as we want.
    722 				 * Just read the block (if requested).
    723 				 */
    724 
    725 				if (bpp != NULL) {
    726 					error = bread(vp, lbn, osize,
    727 						      B_MODIFY, bpp);
    728 					if (error) {
    729 						return (error);
    730 					}
    731 				}
    732 				return 0;
    733 			} else {
    734 
    735 				/*
    736 				 * The existing block is smaller than we want,
    737 				 * grow it.
    738 				 */
    739 				mutex_enter(&ump->um_lock);
    740 				error = ffs_realloccg(ip, lbn,
    741 				    ffs_getdb(fs, ip, lbn),
    742 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    743 					&ip->i_ffs2_db[0]),
    744 				    osize, nsize, flags, cred, bpp, &newb);
    745 				if (error)
    746 					return (error);
    747 			}
    748 		} else {
    749 
    750 			/*
    751 			 * the block was not previously allocated,
    752 			 * allocate a new block or fragment.
    753 			 */
    754 
    755 			if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
    756 				nsize = ffs_fragroundup(fs, size);
    757 			else
    758 				nsize = fs->fs_bsize;
    759 			mutex_enter(&ump->um_lock);
    760 			error = ffs_alloc(ip, lbn,
    761 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    762 				&ip->i_ffs2_db[0]),
    763 			    nsize, flags, cred, &newb);
    764 			if (error)
    765 				return (error);
    766 			if (bpp != NULL) {
    767 				error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
    768 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    769 				if (error)
    770 					return error;
    771 			}
    772 		}
    773 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
    774 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    775 		return (0);
    776 	}
    777 
    778 	/*
    779 	 * Determine the number of levels of indirection.
    780 	 */
    781 
    782 	pref = 0;
    783 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    784 		return (error);
    785 
    786 	/*
    787 	 * Fetch the first indirect block allocating if necessary.
    788 	 */
    789 
    790 	--num;
    791 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
    792 	allocib = NULL;
    793 	allocblk = allociblk;
    794 	if (nb == 0) {
    795 		mutex_enter(&ump->um_lock);
    796 		pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
    797 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    798 		    flags | B_METAONLY, cred, &newb);
    799 		if (error)
    800 			goto fail;
    801 		nb = newb;
    802 		*allocblk++ = nb;
    803 		error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
    804 		    fs->fs_bsize, true, &bp);
    805 		if (error)
    806 			goto fail;
    807 		/*
    808 		 * Write synchronously so that indirect blocks
    809 		 * never point at garbage.
    810 		 */
    811 		if ((error = bwrite(bp)) != 0)
    812 			goto fail;
    813 		unwindidx = 0;
    814 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
    815 		*allocib = ufs_rw64(nb, needswap);
    816 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    817 	}
    818 
    819 	/*
    820 	 * Fetch through the indirect blocks, allocating as necessary.
    821 	 */
    822 
    823 	for (i = 1;;) {
    824 		error = bread(vp,
    825 		    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
    826 		if (error) {
    827 			goto fail;
    828 		}
    829 		bap = (int64_t *)bp->b_data;
    830 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
    831 		if (i == num)
    832 			break;
    833 		i++;
    834 		if (nb != 0) {
    835 			brelse(bp, 0);
    836 			continue;
    837 		}
    838 		if (fscow_run(bp, true) != 0) {
    839 			brelse(bp, 0);
    840 			goto fail;
    841 		}
    842 		mutex_enter(&ump->um_lock);
    843 		/* Try to keep snapshot indirect blocks contiguous. */
    844 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
    845 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
    846 			    flags | B_METAONLY, &bap[0]);
    847 		if (pref == 0)
    848 			pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
    849 			    NULL);
    850 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    851 		    flags | B_METAONLY, cred, &newb);
    852 		if (error) {
    853 			brelse(bp, 0);
    854 			goto fail;
    855 		}
    856 		nb = newb;
    857 		*allocblk++ = nb;
    858 		error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
    859 		    fs->fs_bsize, true, &nbp);
    860 		if (error) {
    861 			brelse(bp, 0);
    862 			goto fail;
    863 		}
    864 		/*
    865 		 * Write synchronously so that indirect blocks
    866 		 * never point at garbage.
    867 		 */
    868 		if ((error = bwrite(nbp)) != 0) {
    869 			brelse(bp, 0);
    870 			goto fail;
    871 		}
    872 		if (unwindidx < 0)
    873 			unwindidx = i - 1;
    874 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
    875 
    876 		/*
    877 		 * If required, write synchronously, otherwise use
    878 		 * delayed write.
    879 		 */
    880 
    881 		if (flags & B_SYNC) {
    882 			bwrite(bp);
    883 		} else {
    884 			bdwrite(bp);
    885 		}
    886 	}
    887 
    888 	if (flags & B_METAONLY) {
    889 		KASSERT(bpp != NULL);
    890 		*bpp = bp;
    891 		return (0);
    892 	}
    893 
    894 	/*
    895 	 * Get the data block, allocating if necessary.
    896 	 */
    897 
    898 	if (nb == 0) {
    899 		if (fscow_run(bp, true) != 0) {
    900 			brelse(bp, 0);
    901 			goto fail;
    902 		}
    903 		mutex_enter(&ump->um_lock);
    904 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
    905 		    &bap[0]);
    906 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    907 		    &newb);
    908 		if (error) {
    909 			brelse(bp, 0);
    910 			goto fail;
    911 		}
    912 		nb = newb;
    913 		*allocblk++ = nb;
    914 		if (bpp != NULL) {
    915 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    916 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    917 			if (error) {
    918 				brelse(bp, 0);
    919 				goto fail;
    920 			}
    921 		}
    922 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
    923 		if (allocib == NULL && unwindidx < 0) {
    924 			unwindidx = i - 1;
    925 		}
    926 
    927 		/*
    928 		 * If required, write synchronously, otherwise use
    929 		 * delayed write.
    930 		 */
    931 
    932 		if (flags & B_SYNC) {
    933 			bwrite(bp);
    934 		} else {
    935 			bdwrite(bp);
    936 		}
    937 		return (0);
    938 	}
    939 	brelse(bp, 0);
    940 	if (bpp != NULL) {
    941 		if (flags & B_CLRBUF) {
    942 			error = bread(vp, lbn, (int)fs->fs_bsize,
    943 			    B_MODIFY, &nbp);
    944 			if (error) {
    945 				goto fail;
    946 			}
    947 		} else {
    948 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    949 			    fs->fs_bsize, true, &nbp);
    950 			if (error)
    951 				goto fail;
    952 		}
    953 		*bpp = nbp;
    954 	}
    955 	return (0);
    956 
    957 fail:
    958 	/*
    959 	 * If we have failed part way through block allocation, we
    960 	 * have to deallocate any indirect blocks that we have allocated.
    961 	 */
    962 
    963 	if (unwindidx >= 0) {
    964 
    965 		/*
    966 		 * First write out any buffers we've created to resolve their
    967 		 * softdeps.  This must be done in reverse order of creation
    968 		 * so that we resolve the dependencies in one pass.
    969 		 * Write the cylinder group buffers for these buffers too.
    970 		 */
    971 
    972 		for (i = num; i >= unwindidx; i--) {
    973 			if (i == 0) {
    974 				break;
    975 			}
    976 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    977 			    fs->fs_bsize, false, &bp) != 0)
    978 				continue;
    979 			if (bp->b_oflags & BO_DELWRI) {
    980 				nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
    981 				    FFS_DBTOFSB(fs, bp->b_blkno))));
    982 				bwrite(bp);
    983 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    984 				    fs->fs_cgsize, false, &bp) != 0)
    985 					continue;
    986 				if (bp->b_oflags & BO_DELWRI) {
    987 					bwrite(bp);
    988 				} else {
    989 					brelse(bp, BC_INVAL);
    990 				}
    991 			} else {
    992 				brelse(bp, BC_INVAL);
    993 			}
    994 		}
    995 
    996 		/*
    997 		 * Now that any dependencies that we created have been
    998 		 * resolved, we can undo the partial allocation.
    999 		 */
   1000 
   1001 		if (unwindidx == 0) {
   1002 			*allocib = 0;
   1003 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1004 		} else {
   1005 			int r;
   1006 
   1007 			r = bread(vp, indirs[unwindidx].in_lbn,
   1008 			    (int)fs->fs_bsize, 0, &bp);
   1009 			if (r) {
   1010 				panic("Could not unwind indirect block, error %d", r);
   1011 			} else {
   1012 				bap = (int64_t *)bp->b_data;
   1013 				bap[indirs[unwindidx].in_off] = 0;
   1014 				bwrite(bp);
   1015 			}
   1016 		}
   1017 		for (i = unwindidx + 1; i <= num; i++) {
   1018 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
   1019 			    fs->fs_bsize, false, &bp) == 0)
   1020 				brelse(bp, BC_INVAL);
   1021 		}
   1022 	}
   1023 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
   1024 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
   1025 		deallocated += fs->fs_bsize;
   1026 	}
   1027 	if (deallocated) {
   1028 #if defined(QUOTA) || defined(QUOTA2)
   1029 		/*
   1030 		 * Restore user's disk quota because allocation failed.
   1031 		 */
   1032 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
   1033 #endif
   1034 		ip->i_ffs2_blocks -= btodb(deallocated);
   1035 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1036 	}
   1037 
   1038 	return (error);
   1039 }
   1040