Home | History | Annotate | Line # | Download | only in ffs
ffs_balloc.c revision 1.48.10.1
      1 /*	$NetBSD: ffs_balloc.c,v 1.48.10.1 2009/05/04 08:14:37 yamt Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 Networks Associates Technology, Inc.
      5  * All rights reserved.
      6  *
      7  * This software was developed for the FreeBSD Project by Marshall
      8  * Kirk McKusick and Network Associates Laboratories, the Security
      9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
     10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
     11  * research program
     12  *
     13  * Copyright (c) 1982, 1986, 1989, 1993
     14  *	The Regents of the University of California.  All rights reserved.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. Neither the name of the University nor the names of its contributors
     25  *    may be used to endorse or promote products derived from this software
     26  *    without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38  * SUCH DAMAGE.
     39  *
     40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.48.10.1 2009/05/04 08:14:37 yamt Exp $");
     45 
     46 #if defined(_KERNEL_OPT)
     47 #include "opt_quota.h"
     48 #endif
     49 
     50 #include <sys/param.h>
     51 #include <sys/systm.h>
     52 #include <sys/buf.h>
     53 #include <sys/file.h>
     54 #include <sys/mount.h>
     55 #include <sys/vnode.h>
     56 #include <sys/kauth.h>
     57 #include <sys/fstrans.h>
     58 
     59 #include <ufs/ufs/quota.h>
     60 #include <ufs/ufs/ufsmount.h>
     61 #include <ufs/ufs/inode.h>
     62 #include <ufs/ufs/ufs_extern.h>
     63 #include <ufs/ufs/ufs_bswap.h>
     64 
     65 #include <ufs/ffs/fs.h>
     66 #include <ufs/ffs/ffs_extern.h>
     67 
     68 #include <uvm/uvm.h>
     69 
     70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
     71     struct buf **);
     72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
     73     struct buf **);
     74 
     75 /*
     76  * Balloc defines the structure of file system storage
     77  * by allocating the physical blocks on a device given
     78  * the inode and the logical block number in a file.
     79  */
     80 
     81 int
     82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
     83     struct buf **bpp)
     84 {
     85 	int error;
     86 
     87 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
     88 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
     89 	else
     90 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
     91 
     92 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
     93 		brelse(*bpp, 0);
     94 
     95 	return error;
     96 }
     97 
     98 static int
     99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    100     int flags, struct buf **bpp)
    101 {
    102 	daddr_t lbn, lastlbn;
    103 	struct buf *bp, *nbp;
    104 	struct inode *ip = VTOI(vp);
    105 	struct fs *fs = ip->i_fs;
    106 	struct ufsmount *ump = ip->i_ump;
    107 	struct indir indirs[NIADDR + 2];
    108 	daddr_t newb, pref, nb;
    109 	int32_t *bap;	/* XXX ondisk32 */
    110 	int deallocated, osize, nsize, num, i, error;
    111 	int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
    112 	int32_t *allocib;
    113 	int unwindidx = -1;
    114 #ifdef FFS_EI
    115 	const int needswap = UFS_FSNEEDSWAP(fs);
    116 #endif
    117 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    118 
    119 	lbn = lblkno(fs, off);
    120 	size = blkoff(fs, off) + size;
    121 	if (size > fs->fs_bsize)
    122 		panic("ffs_balloc: blk too big");
    123 	if (bpp != NULL) {
    124 		*bpp = NULL;
    125 	}
    126 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    127 
    128 	if (lbn < 0)
    129 		return (EFBIG);
    130 
    131 	/*
    132 	 * If the next write will extend the file into a new block,
    133 	 * and the file is currently composed of a fragment
    134 	 * this fragment has to be extended to be a full block.
    135 	 */
    136 
    137 	lastlbn = lblkno(fs, ip->i_size);
    138 	if (lastlbn < NDADDR && lastlbn < lbn) {
    139 		nb = lastlbn;
    140 		osize = blksize(fs, ip, nb);
    141 		if (osize < fs->fs_bsize && osize > 0) {
    142 			mutex_enter(&ump->um_lock);
    143 			error = ffs_realloccg(ip, nb,
    144 				    ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
    145 					&ip->i_ffs1_db[0]),
    146 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    147 			if (error)
    148 				return (error);
    149 			ip->i_size = lblktosize(fs, nb + 1);
    150 			ip->i_ffs1_size = ip->i_size;
    151 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
    152 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
    153 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    154 			if (bpp && *bpp) {
    155 				if (flags & B_SYNC)
    156 					bwrite(*bpp);
    157 				else
    158 					bawrite(*bpp);
    159 			}
    160 		}
    161 	}
    162 
    163 	/*
    164 	 * The first NDADDR blocks are direct blocks
    165 	 */
    166 
    167 	if (lbn < NDADDR) {
    168 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
    169 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    170 
    171 			/*
    172 			 * The block is an already-allocated direct block
    173 			 * and the file already extends past this block,
    174 			 * thus this must be a whole block.
    175 			 * Just read the block (if requested).
    176 			 */
    177 
    178 			if (bpp != NULL) {
    179 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    180 					      B_MODIFY, bpp);
    181 				if (error) {
    182 					brelse(*bpp, 0);
    183 					return (error);
    184 				}
    185 			}
    186 			return (0);
    187 		}
    188 		if (nb != 0) {
    189 
    190 			/*
    191 			 * Consider need to reallocate a fragment.
    192 			 */
    193 
    194 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    195 			nsize = fragroundup(fs, size);
    196 			if (nsize <= osize) {
    197 
    198 				/*
    199 				 * The existing block is already
    200 				 * at least as big as we want.
    201 				 * Just read the block (if requested).
    202 				 */
    203 
    204 				if (bpp != NULL) {
    205 					error = bread(vp, lbn, osize, NOCRED,
    206 						      B_MODIFY, bpp);
    207 					if (error) {
    208 						brelse(*bpp, 0);
    209 						return (error);
    210 					}
    211 				}
    212 				return 0;
    213 			} else {
    214 
    215 				/*
    216 				 * The existing block is smaller than we want,
    217 				 * grow it.
    218 				 */
    219 				mutex_enter(&ump->um_lock);
    220 				error = ffs_realloccg(ip, lbn,
    221 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    222 					&ip->i_ffs1_db[0]),
    223 				    osize, nsize, cred, bpp, &newb);
    224 				if (error)
    225 					return (error);
    226 			}
    227 		} else {
    228 
    229 			/*
    230 			 * the block was not previously allocated,
    231 			 * allocate a new block or fragment.
    232 			 */
    233 
    234 			if (ip->i_size < lblktosize(fs, lbn + 1))
    235 				nsize = fragroundup(fs, size);
    236 			else
    237 				nsize = fs->fs_bsize;
    238 			mutex_enter(&ump->um_lock);
    239 			error = ffs_alloc(ip, lbn,
    240 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    241 				&ip->i_ffs1_db[0]),
    242 			    nsize, flags, cred, &newb);
    243 			if (error)
    244 				return (error);
    245 			if (bpp != NULL) {
    246 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    247 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    248 				if (error)
    249 					return error;
    250 			}
    251 		}
    252 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
    253 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    254 		return (0);
    255 	}
    256 
    257 	/*
    258 	 * Determine the number of levels of indirection.
    259 	 */
    260 
    261 	pref = 0;
    262 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    263 		return (error);
    264 
    265 	/*
    266 	 * Fetch the first indirect block allocating if necessary.
    267 	 */
    268 
    269 	--num;
    270 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
    271 	allocib = NULL;
    272 	allocblk = allociblk;
    273 	if (nb == 0) {
    274 		mutex_enter(&ump->um_lock);
    275 		pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
    276 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    277 		    flags | B_METAONLY, cred, &newb);
    278 		if (error)
    279 			goto fail;
    280 		nb = newb;
    281 		*allocblk++ = nb;
    282 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    283 		    fs->fs_bsize, true, &bp);
    284 		if (error)
    285 			goto fail;
    286 		/*
    287 		 * Write synchronously so that indirect blocks
    288 		 * never point at garbage.
    289 		 */
    290 		if ((error = bwrite(bp)) != 0)
    291 			goto fail;
    292 		unwindidx = 0;
    293 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
    294 		*allocib = ufs_rw32(nb, needswap);
    295 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    296 	}
    297 
    298 	/*
    299 	 * Fetch through the indirect blocks, allocating as necessary.
    300 	 */
    301 
    302 	for (i = 1;;) {
    303 		error = bread(vp,
    304 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    305 		if (error) {
    306 			brelse(bp, 0);
    307 			goto fail;
    308 		}
    309 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
    310 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    311 		if (i == num)
    312 			break;
    313 		i++;
    314 		if (nb != 0) {
    315 			brelse(bp, 0);
    316 			continue;
    317 		}
    318 		if (fscow_run(bp, true) != 0) {
    319 			brelse(bp, 0);
    320 			goto fail;
    321 		}
    322 		mutex_enter(&ump->um_lock);
    323 		if (pref == 0)
    324 			pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
    325 			    NULL);
    326 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    327 		    flags | B_METAONLY, cred, &newb);
    328 		if (error) {
    329 			brelse(bp, 0);
    330 			goto fail;
    331 		}
    332 		nb = newb;
    333 		*allocblk++ = nb;
    334 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    335 		    fs->fs_bsize, true, &nbp);
    336 		if (error) {
    337 			brelse(bp, 0);
    338 			goto fail;
    339 		}
    340 		/*
    341 		 * Write synchronously so that indirect blocks
    342 		 * never point at garbage.
    343 		 */
    344 		if ((error = bwrite(nbp)) != 0) {
    345 			brelse(bp, 0);
    346 			goto fail;
    347 		}
    348 		if (unwindidx < 0)
    349 			unwindidx = i - 1;
    350 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
    351 
    352 		/*
    353 		 * If required, write synchronously, otherwise use
    354 		 * delayed write.
    355 		 */
    356 
    357 		if (flags & B_SYNC) {
    358 			bwrite(bp);
    359 		} else {
    360 			bdwrite(bp);
    361 		}
    362 	}
    363 
    364 	if (flags & B_METAONLY) {
    365 		KASSERT(bpp != NULL);
    366 		*bpp = bp;
    367 		return (0);
    368 	}
    369 
    370 	/*
    371 	 * Get the data block, allocating if necessary.
    372 	 */
    373 
    374 	if (nb == 0) {
    375 		if (fscow_run(bp, true) != 0) {
    376 			brelse(bp, 0);
    377 			goto fail;
    378 		}
    379 		mutex_enter(&ump->um_lock);
    380 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
    381 		    &bap[0]);
    382 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    383 		    &newb);
    384 		if (error) {
    385 			brelse(bp, 0);
    386 			goto fail;
    387 		}
    388 		nb = newb;
    389 		*allocblk++ = nb;
    390 		if (bpp != NULL) {
    391 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    392 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    393 			if (error) {
    394 				brelse(bp, 0);
    395 				goto fail;
    396 			}
    397 		}
    398 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
    399 		if (allocib == NULL && unwindidx < 0) {
    400 			unwindidx = i - 1;
    401 		}
    402 
    403 		/*
    404 		 * If required, write synchronously, otherwise use
    405 		 * delayed write.
    406 		 */
    407 
    408 		if (flags & B_SYNC) {
    409 			bwrite(bp);
    410 		} else {
    411 			bdwrite(bp);
    412 		}
    413 		return (0);
    414 	}
    415 	brelse(bp, 0);
    416 	if (bpp != NULL) {
    417 		if (flags & B_CLRBUF) {
    418 			error = bread(vp, lbn, (int)fs->fs_bsize,
    419 			    NOCRED, B_MODIFY, &nbp);
    420 			if (error) {
    421 				brelse(nbp, 0);
    422 				goto fail;
    423 			}
    424 		} else {
    425 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    426 			    fs->fs_bsize, true, &nbp);
    427 			if (error)
    428 				goto fail;
    429 		}
    430 		*bpp = nbp;
    431 	}
    432 	return (0);
    433 
    434 fail:
    435 	/*
    436 	 * If we have failed part way through block allocation, we
    437 	 * have to deallocate any indirect blocks that we have allocated.
    438 	 */
    439 
    440 	if (unwindidx >= 0) {
    441 
    442 		/*
    443 		 * First write out any buffers we've created to resolve their
    444 		 * softdeps.  This must be done in reverse order of creation
    445 		 * so that we resolve the dependencies in one pass.
    446 		 * Write the cylinder group buffers for these buffers too.
    447 		 */
    448 
    449 		for (i = num; i >= unwindidx; i--) {
    450 			if (i == 0) {
    451 				break;
    452 			}
    453 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    454 			    fs->fs_bsize, false, &bp) != 0)
    455 				continue;
    456 			if (bp->b_oflags & BO_DELWRI) {
    457 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
    458 				    dbtofsb(fs, bp->b_blkno))));
    459 				bwrite(bp);
    460 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    461 				    fs->fs_cgsize, false, &bp) != 0)
    462 					continue;
    463 				if (bp->b_oflags & BO_DELWRI) {
    464 					bwrite(bp);
    465 				} else {
    466 					brelse(bp, BC_INVAL);
    467 				}
    468 			} else {
    469 				brelse(bp, BC_INVAL);
    470 			}
    471 		}
    472 
    473 		/*
    474 		 * Undo the partial allocation.
    475 		 */
    476 		if (unwindidx == 0) {
    477 			*allocib = 0;
    478 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    479 		} else {
    480 			int r;
    481 
    482 			r = bread(vp, indirs[unwindidx].in_lbn,
    483 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
    484 			if (r) {
    485 				panic("Could not unwind indirect block, error %d", r);
    486 				brelse(bp, 0);
    487 			} else {
    488 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
    489 				bap[indirs[unwindidx].in_off] = 0;
    490 				bwrite(bp);
    491 			}
    492 		}
    493 		for (i = unwindidx + 1; i <= num; i++) {
    494 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    495 			    fs->fs_bsize, false, &bp) == 0)
    496 				brelse(bp, BC_INVAL);
    497 		}
    498 	}
    499 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    500 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
    501 		deallocated += fs->fs_bsize;
    502 	}
    503 	if (deallocated) {
    504 #ifdef QUOTA
    505 		/*
    506 		 * Restore user's disk quota because allocation failed.
    507 		 */
    508 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
    509 #endif
    510 		ip->i_ffs1_blocks -= btodb(deallocated);
    511 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    512 	}
    513 	return (error);
    514 }
    515 
    516 static int
    517 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    518     int flags, struct buf **bpp)
    519 {
    520 	daddr_t lbn, lastlbn;
    521 	struct buf *bp, *nbp;
    522 	struct inode *ip = VTOI(vp);
    523 	struct fs *fs = ip->i_fs;
    524 	struct ufsmount *ump = ip->i_ump;
    525 	struct indir indirs[NIADDR + 2];
    526 	daddr_t newb, pref, nb;
    527 	int64_t *bap;
    528 	int deallocated, osize, nsize, num, i, error;
    529 	daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
    530 	int64_t *allocib;
    531 	int unwindidx = -1;
    532 #ifdef FFS_EI
    533 	const int needswap = UFS_FSNEEDSWAP(fs);
    534 #endif
    535 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    536 
    537 	lbn = lblkno(fs, off);
    538 	size = blkoff(fs, off) + size;
    539 	if (size > fs->fs_bsize)
    540 		panic("ffs_balloc: blk too big");
    541 	if (bpp != NULL) {
    542 		*bpp = NULL;
    543 	}
    544 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    545 
    546 	if (lbn < 0)
    547 		return (EFBIG);
    548 
    549 #ifdef notyet
    550 	/*
    551 	 * Check for allocating external data.
    552 	 */
    553 	if (flags & IO_EXT) {
    554 		if (lbn >= NXADDR)
    555 			return (EFBIG);
    556 		/*
    557 		 * If the next write will extend the data into a new block,
    558 		 * and the data is currently composed of a fragment
    559 		 * this fragment has to be extended to be a full block.
    560 		 */
    561 		lastlbn = lblkno(fs, dp->di_extsize);
    562 		if (lastlbn < lbn) {
    563 			nb = lastlbn;
    564 			osize = sblksize(fs, dp->di_extsize, nb);
    565 			if (osize < fs->fs_bsize && osize > 0) {
    566 				mutex_enter(&ump->um_lock);
    567 				error = ffs_realloccg(ip, -1 - nb,
    568 				    dp->di_extb[nb],
    569 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
    570 					flags, &dp->di_extb[0]),
    571 				    osize,
    572 				    (int)fs->fs_bsize, cred, &bp);
    573 				if (error)
    574 					return (error);
    575 				dp->di_extsize = smalllblktosize(fs, nb + 1);
    576 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
    577 				bp->b_xflags |= BX_ALTDATA;
    578 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
    579 				if (flags & IO_SYNC)
    580 					bwrite(bp);
    581 				else
    582 					bawrite(bp);
    583 			}
    584 		}
    585 		/*
    586 		 * All blocks are direct blocks
    587 		 */
    588 		if (flags & BA_METAONLY)
    589 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
    590 		nb = dp->di_extb[lbn];
    591 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
    592 			error = bread(vp, -1 - lbn, fs->fs_bsize,
    593 			    NOCRED, 0, &bp);
    594 			if (error) {
    595 				brelse(bp, 0);
    596 				return (error);
    597 			}
    598 			mutex_enter(&bp->b_interlock);
    599 			bp->b_blkno = fsbtodb(fs, nb);
    600 			bp->b_xflags |= BX_ALTDATA;
    601 			mutex_exit(&bp->b_interlock);
    602 			*bpp = bp;
    603 			return (0);
    604 		}
    605 		if (nb != 0) {
    606 			/*
    607 			 * Consider need to reallocate a fragment.
    608 			 */
    609 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
    610 			nsize = fragroundup(fs, size);
    611 			if (nsize <= osize) {
    612 				error = bread(vp, -1 - lbn, osize,
    613 				    NOCRED, 0, &bp);
    614 				if (error) {
    615 					brelse(bp, 0);
    616 					return (error);
    617 				}
    618 				mutex_enter(&bp->b_interlock);
    619 				bp->b_blkno = fsbtodb(fs, nb);
    620 				bp->b_xflags |= BX_ALTDATA;
    621 				mutex_exit(&bp->b_interlock);
    622 			} else {
    623 				mutex_enter(&ump->um_lock);
    624 				error = ffs_realloccg(ip, -1 - lbn,
    625 				    dp->di_extb[lbn],
    626 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    627 				        &dp->di_extb[0]),
    628 				    osize, nsize, cred, &bp);
    629 				if (error)
    630 					return (error);
    631 				bp->b_xflags |= BX_ALTDATA;
    632 			}
    633 		} else {
    634 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
    635 				nsize = fragroundup(fs, size);
    636 			else
    637 				nsize = fs->fs_bsize;
    638 			mutex_enter(&ump->um_lock);
    639 			error = ffs_alloc(ip, lbn,
    640 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    641 			       &dp->di_extb[0]),
    642 			   nsize, flags, cred, &newb);
    643 			if (error)
    644 				return (error);
    645 			error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
    646 			    nsize, (flags & BA_CLRBUF) != 0, &bp);
    647 			if (error)
    648 				return error;
    649 			bp->b_xflags |= BX_ALTDATA;
    650 		}
    651 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
    652 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    653 		*bpp = bp;
    654 		return (0);
    655 	}
    656 #endif
    657 	/*
    658 	 * If the next write will extend the file into a new block,
    659 	 * and the file is currently composed of a fragment
    660 	 * this fragment has to be extended to be a full block.
    661 	 */
    662 
    663 	lastlbn = lblkno(fs, ip->i_size);
    664 	if (lastlbn < NDADDR && lastlbn < lbn) {
    665 		nb = lastlbn;
    666 		osize = blksize(fs, ip, nb);
    667 		if (osize < fs->fs_bsize && osize > 0) {
    668 			mutex_enter(&ump->um_lock);
    669 			error = ffs_realloccg(ip, nb,
    670 				    ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
    671 					&ip->i_ffs2_db[0]),
    672 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    673 			if (error)
    674 				return (error);
    675 			ip->i_size = lblktosize(fs, nb + 1);
    676 			ip->i_ffs2_size = ip->i_size;
    677 			uvm_vnp_setsize(vp, ip->i_size);
    678 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
    679 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    680 			if (bpp) {
    681 				if (flags & B_SYNC)
    682 					bwrite(*bpp);
    683 				else
    684 					bawrite(*bpp);
    685 			}
    686 		}
    687 	}
    688 
    689 	/*
    690 	 * The first NDADDR blocks are direct blocks
    691 	 */
    692 
    693 	if (lbn < NDADDR) {
    694 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
    695 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    696 
    697 			/*
    698 			 * The block is an already-allocated direct block
    699 			 * and the file already extends past this block,
    700 			 * thus this must be a whole block.
    701 			 * Just read the block (if requested).
    702 			 */
    703 
    704 			if (bpp != NULL) {
    705 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    706 					      B_MODIFY, bpp);
    707 				if (error) {
    708 					brelse(*bpp, 0);
    709 					return (error);
    710 				}
    711 			}
    712 			return (0);
    713 		}
    714 		if (nb != 0) {
    715 
    716 			/*
    717 			 * Consider need to reallocate a fragment.
    718 			 */
    719 
    720 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    721 			nsize = fragroundup(fs, size);
    722 			if (nsize <= osize) {
    723 
    724 				/*
    725 				 * The existing block is already
    726 				 * at least as big as we want.
    727 				 * Just read the block (if requested).
    728 				 */
    729 
    730 				if (bpp != NULL) {
    731 					error = bread(vp, lbn, osize, NOCRED,
    732 						      B_MODIFY, bpp);
    733 					if (error) {
    734 						brelse(*bpp, 0);
    735 						return (error);
    736 					}
    737 				}
    738 				return 0;
    739 			} else {
    740 
    741 				/*
    742 				 * The existing block is smaller than we want,
    743 				 * grow it.
    744 				 */
    745 				mutex_enter(&ump->um_lock);
    746 				error = ffs_realloccg(ip, lbn,
    747 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    748 					&ip->i_ffs2_db[0]),
    749 				    osize, nsize, cred, bpp, &newb);
    750 				if (error)
    751 					return (error);
    752 			}
    753 		} else {
    754 
    755 			/*
    756 			 * the block was not previously allocated,
    757 			 * allocate a new block or fragment.
    758 			 */
    759 
    760 			if (ip->i_size < lblktosize(fs, lbn + 1))
    761 				nsize = fragroundup(fs, size);
    762 			else
    763 				nsize = fs->fs_bsize;
    764 			mutex_enter(&ump->um_lock);
    765 			error = ffs_alloc(ip, lbn,
    766 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    767 				&ip->i_ffs2_db[0]),
    768 			    nsize, flags, cred, &newb);
    769 			if (error)
    770 				return (error);
    771 			if (bpp != NULL) {
    772 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    773 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    774 				if (error)
    775 					return error;
    776 			}
    777 		}
    778 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
    779 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    780 		return (0);
    781 	}
    782 
    783 	/*
    784 	 * Determine the number of levels of indirection.
    785 	 */
    786 
    787 	pref = 0;
    788 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    789 		return (error);
    790 
    791 	/*
    792 	 * Fetch the first indirect block allocating if necessary.
    793 	 */
    794 
    795 	--num;
    796 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
    797 	allocib = NULL;
    798 	allocblk = allociblk;
    799 	if (nb == 0) {
    800 		mutex_enter(&ump->um_lock);
    801 		pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
    802 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    803 		    flags | B_METAONLY, cred, &newb);
    804 		if (error)
    805 			goto fail;
    806 		nb = newb;
    807 		*allocblk++ = nb;
    808 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    809 		    fs->fs_bsize, true, &bp);
    810 		if (error)
    811 			goto fail;
    812 		/*
    813 		 * Write synchronously so that indirect blocks
    814 		 * never point at garbage.
    815 		 */
    816 		if ((error = bwrite(bp)) != 0)
    817 			goto fail;
    818 		unwindidx = 0;
    819 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
    820 		*allocib = ufs_rw64(nb, needswap);
    821 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    822 	}
    823 
    824 	/*
    825 	 * Fetch through the indirect blocks, allocating as necessary.
    826 	 */
    827 
    828 	for (i = 1;;) {
    829 		error = bread(vp,
    830 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    831 		if (error) {
    832 			brelse(bp, 0);
    833 			goto fail;
    834 		}
    835 		bap = (int64_t *)bp->b_data;
    836 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
    837 		if (i == num)
    838 			break;
    839 		i++;
    840 		if (nb != 0) {
    841 			brelse(bp, 0);
    842 			continue;
    843 		}
    844 		if (fscow_run(bp, true) != 0) {
    845 			brelse(bp, 0);
    846 			goto fail;
    847 		}
    848 		mutex_enter(&ump->um_lock);
    849 		if (pref == 0)
    850 			pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
    851 			    NULL);
    852 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    853 		    flags | B_METAONLY, cred, &newb);
    854 		if (error) {
    855 			brelse(bp, 0);
    856 			goto fail;
    857 		}
    858 		nb = newb;
    859 		*allocblk++ = nb;
    860 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    861 		    fs->fs_bsize, true, &nbp);
    862 		if (error) {
    863 			brelse(bp, 0);
    864 			goto fail;
    865 		}
    866 		/*
    867 		 * Write synchronously so that indirect blocks
    868 		 * never point at garbage.
    869 		 */
    870 		if ((error = bwrite(nbp)) != 0) {
    871 			brelse(bp, 0);
    872 			goto fail;
    873 		}
    874 		if (unwindidx < 0)
    875 			unwindidx = i - 1;
    876 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
    877 
    878 		/*
    879 		 * If required, write synchronously, otherwise use
    880 		 * delayed write.
    881 		 */
    882 
    883 		if (flags & B_SYNC) {
    884 			bwrite(bp);
    885 		} else {
    886 			bdwrite(bp);
    887 		}
    888 	}
    889 
    890 	if (flags & B_METAONLY) {
    891 		KASSERT(bpp != NULL);
    892 		*bpp = bp;
    893 		return (0);
    894 	}
    895 
    896 	/*
    897 	 * Get the data block, allocating if necessary.
    898 	 */
    899 
    900 	if (nb == 0) {
    901 		if (fscow_run(bp, true) != 0) {
    902 			brelse(bp, 0);
    903 			goto fail;
    904 		}
    905 		mutex_enter(&ump->um_lock);
    906 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
    907 		    &bap[0]);
    908 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    909 		    &newb);
    910 		if (error) {
    911 			brelse(bp, 0);
    912 			goto fail;
    913 		}
    914 		nb = newb;
    915 		*allocblk++ = nb;
    916 		if (bpp != NULL) {
    917 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    918 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    919 			if (error) {
    920 				brelse(bp, 0);
    921 				goto fail;
    922 			}
    923 		}
    924 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
    925 		if (allocib == NULL && unwindidx < 0) {
    926 			unwindidx = i - 1;
    927 		}
    928 
    929 		/*
    930 		 * If required, write synchronously, otherwise use
    931 		 * delayed write.
    932 		 */
    933 
    934 		if (flags & B_SYNC) {
    935 			bwrite(bp);
    936 		} else {
    937 			bdwrite(bp);
    938 		}
    939 		return (0);
    940 	}
    941 	brelse(bp, 0);
    942 	if (bpp != NULL) {
    943 		if (flags & B_CLRBUF) {
    944 			error = bread(vp, lbn, (int)fs->fs_bsize,
    945 			    NOCRED, B_MODIFY, &nbp);
    946 			if (error) {
    947 				brelse(nbp, 0);
    948 				goto fail;
    949 			}
    950 		} else {
    951 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    952 			    fs->fs_bsize, true, &nbp);
    953 			if (error)
    954 				goto fail;
    955 		}
    956 		*bpp = nbp;
    957 	}
    958 	return (0);
    959 
    960 fail:
    961 	/*
    962 	 * If we have failed part way through block allocation, we
    963 	 * have to deallocate any indirect blocks that we have allocated.
    964 	 */
    965 
    966 	if (unwindidx >= 0) {
    967 
    968 		/*
    969 		 * First write out any buffers we've created to resolve their
    970 		 * softdeps.  This must be done in reverse order of creation
    971 		 * so that we resolve the dependencies in one pass.
    972 		 * Write the cylinder group buffers for these buffers too.
    973 		 */
    974 
    975 		for (i = num; i >= unwindidx; i--) {
    976 			if (i == 0) {
    977 				break;
    978 			}
    979 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    980 			    fs->fs_bsize, false, &bp) != 0)
    981 				continue;
    982 			if (bp->b_oflags & BO_DELWRI) {
    983 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
    984 				    dbtofsb(fs, bp->b_blkno))));
    985 				bwrite(bp);
    986 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    987 				    fs->fs_cgsize, false, &bp) != 0)
    988 					continue;
    989 				if (bp->b_oflags & BO_DELWRI) {
    990 					bwrite(bp);
    991 				} else {
    992 					brelse(bp, BC_INVAL);
    993 				}
    994 			} else {
    995 				brelse(bp, BC_INVAL);
    996 			}
    997 		}
    998 
    999 		/*
   1000 		 * Now that any dependencies that we created have been
   1001 		 * resolved, we can undo the partial allocation.
   1002 		 */
   1003 
   1004 		if (unwindidx == 0) {
   1005 			*allocib = 0;
   1006 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1007 		} else {
   1008 			int r;
   1009 
   1010 			r = bread(vp, indirs[unwindidx].in_lbn,
   1011 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
   1012 			if (r) {
   1013 				panic("Could not unwind indirect block, error %d", r);
   1014 				brelse(bp, 0);
   1015 			} else {
   1016 				bap = (int64_t *)bp->b_data;
   1017 				bap[indirs[unwindidx].in_off] = 0;
   1018 				bwrite(bp);
   1019 			}
   1020 		}
   1021 		for (i = unwindidx + 1; i <= num; i++) {
   1022 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
   1023 			    fs->fs_bsize, false, &bp) == 0)
   1024 				brelse(bp, BC_INVAL);
   1025 		}
   1026 	}
   1027 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
   1028 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
   1029 		deallocated += fs->fs_bsize;
   1030 	}
   1031 	if (deallocated) {
   1032 #ifdef QUOTA
   1033 		/*
   1034 		 * Restore user's disk quota because allocation failed.
   1035 		 */
   1036 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
   1037 #endif
   1038 		ip->i_ffs2_blocks -= btodb(deallocated);
   1039 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1040 	}
   1041 
   1042 	return (error);
   1043 }
   1044