Home | History | Annotate | Line # | Download | only in ffs
ffs_balloc.c revision 1.48.6.1
      1 /*	$NetBSD: ffs_balloc.c,v 1.48.6.1 2008/06/02 13:24:35 mjf Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 Networks Associates Technology, Inc.
      5  * All rights reserved.
      6  *
      7  * This software was developed for the FreeBSD Project by Marshall
      8  * Kirk McKusick and Network Associates Laboratories, the Security
      9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
     10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
     11  * research program
     12  *
     13  * Copyright (c) 1982, 1986, 1989, 1993
     14  *	The Regents of the University of California.  All rights reserved.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. Neither the name of the University nor the names of its contributors
     25  *    may be used to endorse or promote products derived from this software
     26  *    without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38  * SUCH DAMAGE.
     39  *
     40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.48.6.1 2008/06/02 13:24:35 mjf Exp $");
     45 
     46 #if defined(_KERNEL_OPT)
     47 #include "opt_quota.h"
     48 #endif
     49 
     50 #include <sys/param.h>
     51 #include <sys/systm.h>
     52 #include <sys/buf.h>
     53 #include <sys/file.h>
     54 #include <sys/mount.h>
     55 #include <sys/vnode.h>
     56 #include <sys/kauth.h>
     57 #include <sys/fstrans.h>
     58 
     59 #include <ufs/ufs/quota.h>
     60 #include <ufs/ufs/ufsmount.h>
     61 #include <ufs/ufs/inode.h>
     62 #include <ufs/ufs/ufs_extern.h>
     63 #include <ufs/ufs/ufs_bswap.h>
     64 
     65 #include <ufs/ffs/fs.h>
     66 #include <ufs/ffs/ffs_extern.h>
     67 
     68 #include <uvm/uvm.h>
     69 
     70 static int ffs_getblk(struct vnode *, daddr_t, daddr_t, int, bool, buf_t **);
     71 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
     72     struct buf **);
     73 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
     74     struct buf **);
     75 
     76 /*
     77  * Balloc defines the structure of file system storage
     78  * by allocating the physical blocks on a device given
     79  * the inode and the logical block number in a file.
     80  */
     81 
     82 int
     83 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
     84     struct buf **bpp)
     85 {
     86 	int error;
     87 
     88 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
     89 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
     90 	else
     91 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
     92 
     93 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
     94 		brelse(*bpp, 0);
     95 
     96 	return error;
     97 }
     98 
     99 static int
    100 ffs_getblk(struct vnode *vp, daddr_t lblkno, daddr_t blkno, int size,
    101     bool clearbuf, buf_t **bpp)
    102 {
    103 	int error;
    104 
    105 	if ((*bpp = getblk(vp, lblkno, size, 0, 0)) == NULL)
    106 		return ENOMEM;
    107 	(*bpp)->b_blkno = blkno;
    108 	if (clearbuf)
    109 		clrbuf(*bpp);
    110 	if ((error = fscow_run(*bpp, false)) != 0)
    111 		brelse(*bpp, BC_INVAL);
    112 	return error;
    113 }
    114 
    115 static int
    116 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    117     int flags, struct buf **bpp)
    118 {
    119 	daddr_t lbn, lastlbn;
    120 	struct buf *bp, *nbp;
    121 	struct inode *ip = VTOI(vp);
    122 	struct fs *fs = ip->i_fs;
    123 	struct ufsmount *ump = ip->i_ump;
    124 	struct indir indirs[NIADDR + 2];
    125 	daddr_t newb, pref, nb;
    126 	int32_t *bap;	/* XXX ondisk32 */
    127 	int deallocated, osize, nsize, num, i, error;
    128 	int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
    129 	int32_t *allocib;
    130 	int unwindidx = -1;
    131 #ifdef FFS_EI
    132 	const int needswap = UFS_FSNEEDSWAP(fs);
    133 #endif
    134 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    135 
    136 	lbn = lblkno(fs, off);
    137 	size = blkoff(fs, off) + size;
    138 	if (size > fs->fs_bsize)
    139 		panic("ffs_balloc: blk too big");
    140 	if (bpp != NULL) {
    141 		*bpp = NULL;
    142 	}
    143 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    144 
    145 	if (lbn < 0)
    146 		return (EFBIG);
    147 
    148 	/*
    149 	 * If the next write will extend the file into a new block,
    150 	 * and the file is currently composed of a fragment
    151 	 * this fragment has to be extended to be a full block.
    152 	 */
    153 
    154 	lastlbn = lblkno(fs, ip->i_size);
    155 	if (lastlbn < NDADDR && lastlbn < lbn) {
    156 		nb = lastlbn;
    157 		osize = blksize(fs, ip, nb);
    158 		if (osize < fs->fs_bsize && osize > 0) {
    159 			mutex_enter(&ump->um_lock);
    160 			error = ffs_realloccg(ip, nb,
    161 				    ffs_blkpref_ufs1(ip, lastlbn, nb,
    162 					&ip->i_ffs1_db[0]),
    163 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    164 			if (error)
    165 				return (error);
    166 			if (DOINGSOFTDEP(vp))
    167 				softdep_setup_allocdirect(ip, nb, newb,
    168 				    ufs_rw32(ip->i_ffs1_db[nb], needswap),
    169 				    fs->fs_bsize, osize, bpp ? *bpp : NULL);
    170 			ip->i_size = lblktosize(fs, nb + 1);
    171 			ip->i_ffs1_size = ip->i_size;
    172 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
    173 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
    174 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    175 			if (bpp && *bpp) {
    176 				if (flags & B_SYNC)
    177 					bwrite(*bpp);
    178 				else
    179 					bawrite(*bpp);
    180 			}
    181 		}
    182 	}
    183 
    184 	/*
    185 	 * The first NDADDR blocks are direct blocks
    186 	 */
    187 
    188 	if (lbn < NDADDR) {
    189 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
    190 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    191 
    192 			/*
    193 			 * The block is an already-allocated direct block
    194 			 * and the file already extends past this block,
    195 			 * thus this must be a whole block.
    196 			 * Just read the block (if requested).
    197 			 */
    198 
    199 			if (bpp != NULL) {
    200 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    201 					      B_MODIFY, bpp);
    202 				if (error) {
    203 					brelse(*bpp, 0);
    204 					return (error);
    205 				}
    206 			}
    207 			return (0);
    208 		}
    209 		if (nb != 0) {
    210 
    211 			/*
    212 			 * Consider need to reallocate a fragment.
    213 			 */
    214 
    215 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    216 			nsize = fragroundup(fs, size);
    217 			if (nsize <= osize) {
    218 
    219 				/*
    220 				 * The existing block is already
    221 				 * at least as big as we want.
    222 				 * Just read the block (if requested).
    223 				 */
    224 
    225 				if (bpp != NULL) {
    226 					error = bread(vp, lbn, osize, NOCRED,
    227 						      B_MODIFY, bpp);
    228 					if (error) {
    229 						brelse(*bpp, 0);
    230 						return (error);
    231 					}
    232 				}
    233 				return 0;
    234 			} else {
    235 
    236 				/*
    237 				 * The existing block is smaller than we want,
    238 				 * grow it.
    239 				 */
    240 				mutex_enter(&ump->um_lock);
    241 				error = ffs_realloccg(ip, lbn,
    242 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
    243 					&ip->i_ffs1_db[0]), osize, nsize, cred,
    244 					bpp, &newb);
    245 				if (error)
    246 					return (error);
    247 				if (DOINGSOFTDEP(vp))
    248 					softdep_setup_allocdirect(ip, lbn,
    249 					    newb, nb, nsize, osize,
    250 					    bpp ? *bpp : NULL);
    251 			}
    252 		} else {
    253 
    254 			/*
    255 			 * the block was not previously allocated,
    256 			 * allocate a new block or fragment.
    257 			 */
    258 
    259 			if (ip->i_size < lblktosize(fs, lbn + 1))
    260 				nsize = fragroundup(fs, size);
    261 			else
    262 				nsize = fs->fs_bsize;
    263 			mutex_enter(&ump->um_lock);
    264 			error = ffs_alloc(ip, lbn,
    265 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
    266 				&ip->i_ffs1_db[0]),
    267 				nsize, cred, &newb);
    268 			if (error)
    269 				return (error);
    270 			if (bpp != NULL) {
    271 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    272 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    273 				if (error)
    274 					return error;
    275 			}
    276 			if (DOINGSOFTDEP(vp)) {
    277 				softdep_setup_allocdirect(ip, lbn, newb, 0,
    278 				    nsize, 0, bpp ? *bpp : NULL);
    279 			}
    280 		}
    281 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
    282 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    283 		return (0);
    284 	}
    285 
    286 	/*
    287 	 * Determine the number of levels of indirection.
    288 	 */
    289 
    290 	pref = 0;
    291 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    292 		return (error);
    293 
    294 	/*
    295 	 * Fetch the first indirect block allocating if necessary.
    296 	 */
    297 
    298 	--num;
    299 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
    300 	allocib = NULL;
    301 	allocblk = allociblk;
    302 	if (nb == 0) {
    303 		mutex_enter(&ump->um_lock);
    304 		pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
    305 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    306 		    &newb);
    307 		if (error)
    308 			goto fail;
    309 		nb = newb;
    310 		*allocblk++ = nb;
    311 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    312 		    fs->fs_bsize, true, &bp);
    313 		if (error)
    314 			goto fail;
    315 		if (DOINGSOFTDEP(vp)) {
    316 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
    317 			    newb, 0, fs->fs_bsize, 0, bp);
    318 			bdwrite(bp);
    319 		} else {
    320 
    321 			/*
    322 			 * Write synchronously so that indirect blocks
    323 			 * never point at garbage.
    324 			 */
    325 
    326 			if ((error = bwrite(bp)) != 0)
    327 				goto fail;
    328 		}
    329 		unwindidx = 0;
    330 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
    331 		*allocib = ufs_rw32(nb, needswap);
    332 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    333 	}
    334 
    335 	/*
    336 	 * Fetch through the indirect blocks, allocating as necessary.
    337 	 */
    338 
    339 	for (i = 1;;) {
    340 		error = bread(vp,
    341 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    342 		if (error) {
    343 			brelse(bp, 0);
    344 			goto fail;
    345 		}
    346 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
    347 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    348 		if (i == num)
    349 			break;
    350 		i++;
    351 		if (nb != 0) {
    352 			brelse(bp, 0);
    353 			continue;
    354 		}
    355 		if (fscow_run(bp, true) != 0) {
    356 			brelse(bp, 0);
    357 			goto fail;
    358 		}
    359 		mutex_enter(&ump->um_lock);
    360 		if (pref == 0)
    361 			pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
    362 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    363 		    &newb);
    364 		if (error) {
    365 			brelse(bp, 0);
    366 			goto fail;
    367 		}
    368 		nb = newb;
    369 		*allocblk++ = nb;
    370 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    371 		    fs->fs_bsize, true, &nbp);
    372 		if (error) {
    373 			brelse(bp, 0);
    374 			goto fail;
    375 		}
    376 		if (DOINGSOFTDEP(vp)) {
    377 			softdep_setup_allocindir_meta(nbp, ip, bp,
    378 			    indirs[i - 1].in_off, nb);
    379 			bdwrite(nbp);
    380 		} else {
    381 
    382 			/*
    383 			 * Write synchronously so that indirect blocks
    384 			 * never point at garbage.
    385 			 */
    386 
    387 			if ((error = bwrite(nbp)) != 0) {
    388 				brelse(bp, 0);
    389 				goto fail;
    390 			}
    391 		}
    392 		if (unwindidx < 0)
    393 			unwindidx = i - 1;
    394 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
    395 
    396 		/*
    397 		 * If required, write synchronously, otherwise use
    398 		 * delayed write.
    399 		 */
    400 
    401 		if (flags & B_SYNC) {
    402 			bwrite(bp);
    403 		} else {
    404 			bdwrite(bp);
    405 		}
    406 	}
    407 
    408 	if (flags & B_METAONLY) {
    409 		KASSERT(bpp != NULL);
    410 		*bpp = bp;
    411 		return (0);
    412 	}
    413 
    414 	/*
    415 	 * Get the data block, allocating if necessary.
    416 	 */
    417 
    418 	if (nb == 0) {
    419 		if (fscow_run(bp, true) != 0) {
    420 			brelse(bp, 0);
    421 			goto fail;
    422 		}
    423 		mutex_enter(&ump->um_lock);
    424 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
    425 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    426 		    &newb);
    427 		if (error) {
    428 			brelse(bp, 0);
    429 			goto fail;
    430 		}
    431 		nb = newb;
    432 		*allocblk++ = nb;
    433 		if (bpp != NULL) {
    434 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    435 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    436 			if (error) {
    437 				brelse(bp, 0);
    438 				goto fail;
    439 			}
    440 		}
    441 		if (DOINGSOFTDEP(vp))
    442 			softdep_setup_allocindir_page(ip, lbn, bp,
    443 			    indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
    444 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
    445 		if (allocib == NULL && unwindidx < 0) {
    446 			unwindidx = i - 1;
    447 		}
    448 
    449 		/*
    450 		 * If required, write synchronously, otherwise use
    451 		 * delayed write.
    452 		 */
    453 
    454 		if (flags & B_SYNC) {
    455 			bwrite(bp);
    456 		} else {
    457 			bdwrite(bp);
    458 		}
    459 		return (0);
    460 	}
    461 	brelse(bp, 0);
    462 	if (bpp != NULL) {
    463 		if (flags & B_CLRBUF) {
    464 			error = bread(vp, lbn, (int)fs->fs_bsize,
    465 			    NOCRED, B_MODIFY, &nbp);
    466 			if (error) {
    467 				brelse(nbp, 0);
    468 				goto fail;
    469 			}
    470 		} else {
    471 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    472 			    fs->fs_bsize, true, &nbp);
    473 			if (error)
    474 				goto fail;
    475 		}
    476 		*bpp = nbp;
    477 	}
    478 	return (0);
    479 
    480 fail:
    481 	/*
    482 	 * If we have failed part way through block allocation, we
    483 	 * have to deallocate any indirect blocks that we have allocated.
    484 	 */
    485 
    486 	if (unwindidx >= 0) {
    487 
    488 		/*
    489 		 * First write out any buffers we've created to resolve their
    490 		 * softdeps.  This must be done in reverse order of creation
    491 		 * so that we resolve the dependencies in one pass.
    492 		 * Write the cylinder group buffers for these buffers too.
    493 		 */
    494 
    495 		for (i = num; i >= unwindidx; i--) {
    496 			if (i == 0) {
    497 				break;
    498 			}
    499 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
    500 			    0);
    501 			if (bp->b_oflags & BO_DELWRI) {
    502 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
    503 				    dbtofsb(fs, bp->b_blkno))));
    504 				bwrite(bp);
    505 				bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
    506 				    0, 0);
    507 				if (bp->b_oflags & BO_DELWRI) {
    508 					bwrite(bp);
    509 				} else {
    510 					brelse(bp, BC_INVAL);
    511 				}
    512 			} else {
    513 				brelse(bp, BC_INVAL);
    514 			}
    515 		}
    516 
    517 		/* Now flush all dependencies to disk. */
    518 #ifdef notyet
    519 		/* XXX pages locked */
    520 		(void)softdep_sync_metadata(vp);
    521 #endif
    522 
    523 		if (DOINGSOFTDEP(vp) && unwindidx == 0) {
    524 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    525 			ffs_update(vp, NULL, NULL, UPDATE_WAIT);
    526 		}
    527 
    528 		/*
    529 		 * Now that any dependencies that we created have been
    530 		 * resolved, we can undo the partial allocation.
    531 		 */
    532 
    533 		if (unwindidx == 0) {
    534 			*allocib = 0;
    535 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    536 			if (DOINGSOFTDEP(vp))
    537 				ffs_update(vp, NULL, NULL, UPDATE_WAIT);
    538 		} else {
    539 			int r;
    540 
    541 			r = bread(vp, indirs[unwindidx].in_lbn,
    542 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
    543 			if (r) {
    544 				panic("Could not unwind indirect block, error %d", r);
    545 				brelse(bp, 0);
    546 			} else {
    547 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
    548 				bap[indirs[unwindidx].in_off] = 0;
    549 				bwrite(bp);
    550 			}
    551 		}
    552 		for (i = unwindidx + 1; i <= num; i++) {
    553 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
    554 			    0);
    555 			brelse(bp, BC_INVAL);
    556 		}
    557 	}
    558 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    559 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
    560 		deallocated += fs->fs_bsize;
    561 	}
    562 	if (deallocated) {
    563 #ifdef QUOTA
    564 		/*
    565 		 * Restore user's disk quota because allocation failed.
    566 		 */
    567 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
    568 #endif
    569 		ip->i_ffs1_blocks -= btodb(deallocated);
    570 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    571 	}
    572 	/*
    573 	 * Flush all dependencies again so that the soft updates code
    574 	 * doesn't find any untracked changes.
    575 	 */
    576 #ifdef notyet
    577 	/* XXX pages locked */
    578 	(void)softdep_sync_metadata(vp);
    579 #endif
    580 	return (error);
    581 }
    582 
    583 static int
    584 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    585     int flags, struct buf **bpp)
    586 {
    587 	daddr_t lbn, lastlbn;
    588 	struct buf *bp, *nbp;
    589 	struct inode *ip = VTOI(vp);
    590 	struct fs *fs = ip->i_fs;
    591 	struct ufsmount *ump = ip->i_ump;
    592 	struct indir indirs[NIADDR + 2];
    593 	daddr_t newb, pref, nb;
    594 	int64_t *bap;
    595 	int deallocated, osize, nsize, num, i, error;
    596 	daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
    597 	int64_t *allocib;
    598 	int unwindidx = -1;
    599 #ifdef FFS_EI
    600 	const int needswap = UFS_FSNEEDSWAP(fs);
    601 #endif
    602 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    603 
    604 	lbn = lblkno(fs, off);
    605 	size = blkoff(fs, off) + size;
    606 	if (size > fs->fs_bsize)
    607 		panic("ffs_balloc: blk too big");
    608 	if (bpp != NULL) {
    609 		*bpp = NULL;
    610 	}
    611 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    612 
    613 	if (lbn < 0)
    614 		return (EFBIG);
    615 
    616 #ifdef notyet
    617 	/*
    618 	 * Check for allocating external data.
    619 	 */
    620 	if (flags & IO_EXT) {
    621 		if (lbn >= NXADDR)
    622 			return (EFBIG);
    623 		/*
    624 		 * If the next write will extend the data into a new block,
    625 		 * and the data is currently composed of a fragment
    626 		 * this fragment has to be extended to be a full block.
    627 		 */
    628 		lastlbn = lblkno(fs, dp->di_extsize);
    629 		if (lastlbn < lbn) {
    630 			nb = lastlbn;
    631 			osize = sblksize(fs, dp->di_extsize, nb);
    632 			if (osize < fs->fs_bsize && osize > 0) {
    633 				mutex_enter(&ump->um_lock);
    634 				error = ffs_realloccg(ip, -1 - nb,
    635 				    dp->di_extb[nb],
    636 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
    637 				    &dp->di_extb[0]), osize,
    638 				    (int)fs->fs_bsize, cred, &bp);
    639 				if (error)
    640 					return (error);
    641 				if (DOINGSOFTDEP(vp))
    642 					softdep_setup_allocext(ip, nb,
    643 					    dbtofsb(fs, bp->b_blkno),
    644 					    dp->di_extb[nb],
    645 					    fs->fs_bsize, osize, bp);
    646 				dp->di_extsize = smalllblktosize(fs, nb + 1);
    647 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
    648 				bp->b_xflags |= BX_ALTDATA;
    649 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
    650 				if (flags & IO_SYNC)
    651 					bwrite(bp);
    652 				else
    653 					bawrite(bp);
    654 			}
    655 		}
    656 		/*
    657 		 * All blocks are direct blocks
    658 		 */
    659 		if (flags & BA_METAONLY)
    660 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
    661 		nb = dp->di_extb[lbn];
    662 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
    663 			error = bread(vp, -1 - lbn, fs->fs_bsize,
    664 			    NOCRED, 0, &bp);
    665 			if (error) {
    666 				brelse(bp, 0);
    667 				return (error);
    668 			}
    669 			mutex_enter(&bp->b_interlock);
    670 			bp->b_blkno = fsbtodb(fs, nb);
    671 			bp->b_xflags |= BX_ALTDATA;
    672 			mutex_exit(&bp->b_interlock);
    673 			*bpp = bp;
    674 			return (0);
    675 		}
    676 		if (nb != 0) {
    677 			/*
    678 			 * Consider need to reallocate a fragment.
    679 			 */
    680 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
    681 			nsize = fragroundup(fs, size);
    682 			if (nsize <= osize) {
    683 				error = bread(vp, -1 - lbn, osize,
    684 				    NOCRED, 0, &bp);
    685 				if (error) {
    686 					brelse(bp, 0);
    687 					return (error);
    688 				}
    689 				mutex_enter(&bp->b_interlock);
    690 				bp->b_blkno = fsbtodb(fs, nb);
    691 				bp->b_xflags |= BX_ALTDATA;
    692 				mutex_exit(&bp->b_interlock);
    693 			} else {
    694 				mutex_enter(&ump->um_lock);
    695 				error = ffs_realloccg(ip, -1 - lbn,
    696 				    dp->di_extb[lbn],
    697 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
    698 				    &dp->di_extb[0]), osize, nsize, cred, &bp);
    699 				if (error)
    700 					return (error);
    701 				bp->b_xflags |= BX_ALTDATA;
    702 				if (DOINGSOFTDEP(vp))
    703 					softdep_setup_allocext(ip, lbn,
    704 					    dbtofsb(fs, bp->b_blkno), nb,
    705 					    nsize, osize, bp);
    706 			}
    707 		} else {
    708 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
    709 				nsize = fragroundup(fs, size);
    710 			else
    711 				nsize = fs->fs_bsize;
    712 			mutex_enter(&ump->um_lock);
    713 			error = ffs_alloc(ip, lbn,
    714 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
    715 			   nsize, cred, &newb);
    716 			if (error)
    717 				return (error);
    718 			bp = getblk(vp, -1 - lbn, nsize, 0, 0);
    719 			bp->b_blkno = fsbtodb(fs, newb);
    720 			bp->b_xflags |= BX_ALTDATA;
    721 			if (flags & BA_CLRBUF)
    722 				vfs_bio_clrbuf(bp);
    723 			if (DOINGSOFTDEP(vp))
    724 				softdep_setup_allocext(ip, lbn, newb, 0,
    725 				    nsize, 0, bp);
    726 		}
    727 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
    728 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    729 		*bpp = bp;
    730 		return (0);
    731 	}
    732 #endif
    733 	/*
    734 	 * If the next write will extend the file into a new block,
    735 	 * and the file is currently composed of a fragment
    736 	 * this fragment has to be extended to be a full block.
    737 	 */
    738 
    739 	lastlbn = lblkno(fs, ip->i_size);
    740 	if (lastlbn < NDADDR && lastlbn < lbn) {
    741 		nb = lastlbn;
    742 		osize = blksize(fs, ip, nb);
    743 		if (osize < fs->fs_bsize && osize > 0) {
    744 			mutex_enter(&ump->um_lock);
    745 			error = ffs_realloccg(ip, nb,
    746 				    ffs_blkpref_ufs2(ip, lastlbn, nb,
    747 					&ip->i_ffs2_db[0]),
    748 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    749 			if (error)
    750 				return (error);
    751 			if (DOINGSOFTDEP(vp))
    752 				softdep_setup_allocdirect(ip, nb, newb,
    753 				    ufs_rw64(ip->i_ffs2_db[nb], needswap),
    754 				    fs->fs_bsize, osize, bpp ? *bpp : NULL);
    755 			ip->i_size = lblktosize(fs, nb + 1);
    756 			ip->i_ffs2_size = ip->i_size;
    757 			uvm_vnp_setsize(vp, ip->i_size);
    758 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
    759 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    760 			if (bpp) {
    761 				if (flags & B_SYNC)
    762 					bwrite(*bpp);
    763 				else
    764 					bawrite(*bpp);
    765 			}
    766 		}
    767 	}
    768 
    769 	/*
    770 	 * The first NDADDR blocks are direct blocks
    771 	 */
    772 
    773 	if (lbn < NDADDR) {
    774 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
    775 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    776 
    777 			/*
    778 			 * The block is an already-allocated direct block
    779 			 * and the file already extends past this block,
    780 			 * thus this must be a whole block.
    781 			 * Just read the block (if requested).
    782 			 */
    783 
    784 			if (bpp != NULL) {
    785 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    786 					      B_MODIFY, bpp);
    787 				if (error) {
    788 					brelse(*bpp, 0);
    789 					return (error);
    790 				}
    791 			}
    792 			return (0);
    793 		}
    794 		if (nb != 0) {
    795 
    796 			/*
    797 			 * Consider need to reallocate a fragment.
    798 			 */
    799 
    800 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    801 			nsize = fragroundup(fs, size);
    802 			if (nsize <= osize) {
    803 
    804 				/*
    805 				 * The existing block is already
    806 				 * at least as big as we want.
    807 				 * Just read the block (if requested).
    808 				 */
    809 
    810 				if (bpp != NULL) {
    811 					error = bread(vp, lbn, osize, NOCRED,
    812 						      B_MODIFY, bpp);
    813 					if (error) {
    814 						brelse(*bpp, 0);
    815 						return (error);
    816 					}
    817 				}
    818 				return 0;
    819 			} else {
    820 
    821 				/*
    822 				 * The existing block is smaller than we want,
    823 				 * grow it.
    824 				 */
    825 				mutex_enter(&ump->um_lock);
    826 				error = ffs_realloccg(ip, lbn,
    827 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
    828 					&ip->i_ffs2_db[0]), osize, nsize, cred,
    829 					bpp, &newb);
    830 				if (error)
    831 					return (error);
    832 				if (DOINGSOFTDEP(vp))
    833 					softdep_setup_allocdirect(ip, lbn,
    834 					    newb, nb, nsize, osize,
    835 					    bpp ? *bpp : NULL);
    836 			}
    837 		} else {
    838 
    839 			/*
    840 			 * the block was not previously allocated,
    841 			 * allocate a new block or fragment.
    842 			 */
    843 
    844 			if (ip->i_size < lblktosize(fs, lbn + 1))
    845 				nsize = fragroundup(fs, size);
    846 			else
    847 				nsize = fs->fs_bsize;
    848 			mutex_enter(&ump->um_lock);
    849 			error = ffs_alloc(ip, lbn,
    850 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
    851 				&ip->i_ffs2_db[0]), nsize, cred, &newb);
    852 			if (error)
    853 				return (error);
    854 			if (bpp != NULL) {
    855 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    856 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    857 				if (error)
    858 					return error;
    859 			}
    860 			if (DOINGSOFTDEP(vp)) {
    861 				softdep_setup_allocdirect(ip, lbn, newb, 0,
    862 				    nsize, 0, bpp ? *bpp : NULL);
    863 			}
    864 		}
    865 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
    866 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    867 		return (0);
    868 	}
    869 
    870 	/*
    871 	 * Determine the number of levels of indirection.
    872 	 */
    873 
    874 	pref = 0;
    875 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    876 		return (error);
    877 
    878 	/*
    879 	 * Fetch the first indirect block allocating if necessary.
    880 	 */
    881 
    882 	--num;
    883 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
    884 	allocib = NULL;
    885 	allocblk = allociblk;
    886 	if (nb == 0) {
    887 		mutex_enter(&ump->um_lock);
    888 		pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
    889 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    890 		    &newb);
    891 		if (error)
    892 			goto fail;
    893 		nb = newb;
    894 		*allocblk++ = nb;
    895 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    896 		    fs->fs_bsize, true, &bp);
    897 		if (error)
    898 			goto fail;
    899 		if (DOINGSOFTDEP(vp)) {
    900 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
    901 			    newb, 0, fs->fs_bsize, 0, bp);
    902 			bdwrite(bp);
    903 		} else {
    904 
    905 			/*
    906 			 * Write synchronously so that indirect blocks
    907 			 * never point at garbage.
    908 			 */
    909 
    910 			if ((error = bwrite(bp)) != 0)
    911 				goto fail;
    912 		}
    913 		unwindidx = 0;
    914 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
    915 		*allocib = ufs_rw64(nb, needswap);
    916 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    917 	}
    918 
    919 	/*
    920 	 * Fetch through the indirect blocks, allocating as necessary.
    921 	 */
    922 
    923 	for (i = 1;;) {
    924 		error = bread(vp,
    925 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    926 		if (error) {
    927 			brelse(bp, 0);
    928 			goto fail;
    929 		}
    930 		bap = (int64_t *)bp->b_data;
    931 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
    932 		if (i == num)
    933 			break;
    934 		i++;
    935 		if (nb != 0) {
    936 			brelse(bp, 0);
    937 			continue;
    938 		}
    939 		if (fscow_run(bp, true) != 0) {
    940 			brelse(bp, 0);
    941 			goto fail;
    942 		}
    943 		mutex_enter(&ump->um_lock);
    944 		if (pref == 0)
    945 			pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
    946 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    947 		    &newb);
    948 		if (error) {
    949 			brelse(bp, 0);
    950 			goto fail;
    951 		}
    952 		nb = newb;
    953 		*allocblk++ = nb;
    954 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    955 		    fs->fs_bsize, true, &nbp);
    956 		if (error) {
    957 			brelse(bp, 0);
    958 			goto fail;
    959 		}
    960 		if (DOINGSOFTDEP(vp)) {
    961 			softdep_setup_allocindir_meta(nbp, ip, bp,
    962 			    indirs[i - 1].in_off, nb);
    963 			bdwrite(nbp);
    964 		} else {
    965 
    966 			/*
    967 			 * Write synchronously so that indirect blocks
    968 			 * never point at garbage.
    969 			 */
    970 
    971 			if ((error = bwrite(nbp)) != 0) {
    972 				brelse(bp, 0);
    973 				goto fail;
    974 			}
    975 		}
    976 		if (unwindidx < 0)
    977 			unwindidx = i - 1;
    978 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
    979 
    980 		/*
    981 		 * If required, write synchronously, otherwise use
    982 		 * delayed write.
    983 		 */
    984 
    985 		if (flags & B_SYNC) {
    986 			bwrite(bp);
    987 		} else {
    988 			bdwrite(bp);
    989 		}
    990 	}
    991 
    992 	if (flags & B_METAONLY) {
    993 		KASSERT(bpp != NULL);
    994 		*bpp = bp;
    995 		return (0);
    996 	}
    997 
    998 	/*
    999 	 * Get the data block, allocating if necessary.
   1000 	 */
   1001 
   1002 	if (nb == 0) {
   1003 		if (fscow_run(bp, true) != 0) {
   1004 			brelse(bp, 0);
   1005 			goto fail;
   1006 		}
   1007 		mutex_enter(&ump->um_lock);
   1008 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
   1009 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
   1010 		    &newb);
   1011 		if (error) {
   1012 			brelse(bp, 0);
   1013 			goto fail;
   1014 		}
   1015 		nb = newb;
   1016 		*allocblk++ = nb;
   1017 		if (bpp != NULL) {
   1018 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
   1019 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
   1020 			if (error) {
   1021 				brelse(bp, 0);
   1022 				goto fail;
   1023 			}
   1024 		}
   1025 		if (DOINGSOFTDEP(vp))
   1026 			softdep_setup_allocindir_page(ip, lbn, bp,
   1027 			    indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
   1028 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
   1029 		if (allocib == NULL && unwindidx < 0) {
   1030 			unwindidx = i - 1;
   1031 		}
   1032 
   1033 		/*
   1034 		 * If required, write synchronously, otherwise use
   1035 		 * delayed write.
   1036 		 */
   1037 
   1038 		if (flags & B_SYNC) {
   1039 			bwrite(bp);
   1040 		} else {
   1041 			bdwrite(bp);
   1042 		}
   1043 		return (0);
   1044 	}
   1045 	brelse(bp, 0);
   1046 	if (bpp != NULL) {
   1047 		if (flags & B_CLRBUF) {
   1048 			error = bread(vp, lbn, (int)fs->fs_bsize,
   1049 			    NOCRED, B_MODIFY, &nbp);
   1050 			if (error) {
   1051 				brelse(nbp, 0);
   1052 				goto fail;
   1053 			}
   1054 		} else {
   1055 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
   1056 			    fs->fs_bsize, true, &nbp);
   1057 			if (error)
   1058 				goto fail;
   1059 		}
   1060 		*bpp = nbp;
   1061 	}
   1062 	return (0);
   1063 
   1064 fail:
   1065 	/*
   1066 	 * If we have failed part way through block allocation, we
   1067 	 * have to deallocate any indirect blocks that we have allocated.
   1068 	 */
   1069 
   1070 	if (unwindidx >= 0) {
   1071 
   1072 		/*
   1073 		 * First write out any buffers we've created to resolve their
   1074 		 * softdeps.  This must be done in reverse order of creation
   1075 		 * so that we resolve the dependencies in one pass.
   1076 		 * Write the cylinder group buffers for these buffers too.
   1077 		 */
   1078 
   1079 		for (i = num; i >= unwindidx; i--) {
   1080 			if (i == 0) {
   1081 				break;
   1082 			}
   1083 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
   1084 			    0);
   1085 			if (bp->b_oflags & BO_DELWRI) {
   1086 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
   1087 				    dbtofsb(fs, bp->b_blkno))));
   1088 				bwrite(bp);
   1089 				bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
   1090 				    0, 0);
   1091 				if (bp->b_oflags & BO_DELWRI) {
   1092 					bwrite(bp);
   1093 				} else {
   1094 					brelse(bp, BC_INVAL);
   1095 				}
   1096 			} else {
   1097 				brelse(bp, BC_INVAL);
   1098 			}
   1099 		}
   1100 
   1101 		/* Now flush the dependencies to disk. */
   1102 #ifdef notyet
   1103 		/* XXX pages locked */
   1104 		(void)softdep_sync_metadata(vp);
   1105 #endif
   1106 
   1107 		if (DOINGSOFTDEP(vp) && unwindidx == 0) {
   1108 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1109 			ffs_update(vp, NULL, NULL, UPDATE_WAIT);
   1110 		}
   1111 
   1112 		/*
   1113 		 * Now that any dependencies that we created have been
   1114 		 * resolved, we can undo the partial allocation.
   1115 		 */
   1116 
   1117 		if (unwindidx == 0) {
   1118 			*allocib = 0;
   1119 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1120 			if (DOINGSOFTDEP(vp))
   1121 				ffs_update(vp, NULL, NULL, UPDATE_WAIT);
   1122 		} else {
   1123 			int r;
   1124 
   1125 			r = bread(vp, indirs[unwindidx].in_lbn,
   1126 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
   1127 			if (r) {
   1128 				panic("Could not unwind indirect block, error %d", r);
   1129 				brelse(bp, 0);
   1130 			} else {
   1131 				bap = (int64_t *)bp->b_data;
   1132 				bap[indirs[unwindidx].in_off] = 0;
   1133 				bwrite(bp);
   1134 			}
   1135 		}
   1136 		for (i = unwindidx + 1; i <= num; i++) {
   1137 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
   1138 			    0);
   1139 			brelse(bp, BC_INVAL);
   1140 		}
   1141 	}
   1142 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
   1143 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
   1144 		deallocated += fs->fs_bsize;
   1145 	}
   1146 	if (deallocated) {
   1147 #ifdef QUOTA
   1148 		/*
   1149 		 * Restore user's disk quota because allocation failed.
   1150 		 */
   1151 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
   1152 #endif
   1153 		ip->i_ffs2_blocks -= btodb(deallocated);
   1154 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1155 	}
   1156 
   1157 	/*
   1158 	 * Flush all dependencies again so that the soft updates code
   1159 	 * doesn't find any untracked changes.
   1160 	 */
   1161 #ifdef notyet
   1162 	/* XXX pages locked */
   1163 	(void)softdep_sync_metadata(vp);
   1164 #endif
   1165 	return (error);
   1166 }
   1167