Home | History | Annotate | Line # | Download | only in ffs
ffs_balloc.c revision 1.31
      1 /*	$NetBSD: ffs_balloc.c,v 1.31 2003/01/24 21:55:21 fvdl Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1982, 1986, 1989, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     36  */
     37 
     38 #include <sys/cdefs.h>
     39 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.31 2003/01/24 21:55:21 fvdl Exp $");
     40 
     41 #if defined(_KERNEL_OPT)
     42 #include "opt_quota.h"
     43 #endif
     44 
     45 #include <sys/param.h>
     46 #include <sys/systm.h>
     47 #include <sys/buf.h>
     48 #include <sys/file.h>
     49 #include <sys/mount.h>
     50 #include <sys/vnode.h>
     51 #include <sys/mount.h>
     52 
     53 #include <ufs/ufs/quota.h>
     54 #include <ufs/ufs/ufsmount.h>
     55 #include <ufs/ufs/inode.h>
     56 #include <ufs/ufs/ufs_extern.h>
     57 #include <ufs/ufs/ufs_bswap.h>
     58 
     59 #include <ufs/ffs/fs.h>
     60 #include <ufs/ffs/ffs_extern.h>
     61 
     62 #include <uvm/uvm.h>
     63 
     64 /*
     65  * Balloc defines the structure of file system storage
     66  * by allocating the physical blocks on a device given
     67  * the inode and the logical block number in a file.
     68  */
     69 int
     70 ffs_balloc(v)
     71 	void *v;
     72 {
     73 	struct vop_balloc_args /* {
     74 		struct vnode *a_vp;
     75 		off_t a_startoffset;
     76 		int a_size;
     77 		struct ucred *a_cred;
     78 		int a_flags;
     79 		struct buf **a_bpp;
     80 	} */ *ap = v;
     81 	daddr_t lbn;
     82 	int size;
     83 	struct ucred *cred;
     84 	int flags;
     85 	daddr_t nb;
     86 	struct buf *bp, *nbp;
     87 	struct vnode *vp = ap->a_vp;
     88 	struct inode *ip = VTOI(vp);
     89 	struct fs *fs = ip->i_fs;
     90 	struct indir indirs[NIADDR + 2];
     91 	daddr_t newb, pref;
     92 	int32_t *bap;	/* XXX ondisk32 */
     93 	int deallocated, osize, nsize, num, i, error;
     94 	daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
     95 	int32_t *allocib;	/* XXX ondisk32 */
     96 	int unwindidx = -1;
     97 	struct buf **bpp = ap->a_bpp;
     98 #ifdef FFS_EI
     99 	const int needswap = UFS_FSNEEDSWAP(fs);
    100 #endif
    101 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    102 
    103 	lbn = lblkno(fs, ap->a_startoffset);
    104 	size = blkoff(fs, ap->a_startoffset) + ap->a_size;
    105 	if (size > fs->fs_bsize)
    106 		panic("ffs_balloc: blk too big");
    107 	if (bpp != NULL) {
    108 		*bpp = NULL;
    109 	}
    110 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    111 
    112 	KASSERT(size <= fs->fs_bsize);
    113 	if (lbn < 0)
    114 		return (EFBIG);
    115 	cred = ap->a_cred;
    116 	flags = ap->a_flags;
    117 
    118 	/*
    119 	 * If the next write will extend the file into a new block,
    120 	 * and the file is currently composed of a fragment
    121 	 * this fragment has to be extended to be a full block.
    122 	 */
    123 
    124 	nb = lblkno(fs, ip->i_ffs_size);
    125 	if (nb < NDADDR && nb < lbn) {
    126 		osize = blksize(fs, ip, nb);
    127 		if (osize < fs->fs_bsize && osize > 0) {
    128 			error = ffs_realloccg(ip, nb,
    129 				ffs_blkpref(ip, nb, (int)nb, &ip->i_ffs_db[0]),
    130 				osize, (int)fs->fs_bsize, cred, bpp, &newb);
    131 			if (error)
    132 				return (error);
    133 			if (DOINGSOFTDEP(vp))
    134 				softdep_setup_allocdirect(ip, nb, newb,
    135 				    ufs_rw32(ip->i_ffs_db[nb], needswap),
    136 				    fs->fs_bsize, osize, bpp ? *bpp : NULL);
    137 			ip->i_ffs_size = lblktosize(fs, nb + 1);
    138 			uvm_vnp_setsize(vp, ip->i_ffs_size);
    139 			/* XXX ondisk32 */
    140 			ip->i_ffs_db[nb] = ufs_rw32((int32_t)newb, needswap);
    141 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    142 			if (bpp) {
    143 				if (flags & B_SYNC)
    144 					bwrite(*bpp);
    145 				else
    146 					bawrite(*bpp);
    147 			}
    148 		}
    149 	}
    150 
    151 	/*
    152 	 * The first NDADDR blocks are direct blocks
    153 	 */
    154 
    155 	if (lbn < NDADDR) {
    156 		/* XXX ondisk32 */
    157 		nb = ufs_rw32(ip->i_ffs_db[lbn], needswap);
    158 		if (nb != 0 && ip->i_ffs_size >= lblktosize(fs, lbn + 1)) {
    159 
    160 			/*
    161 			 * The block is an already-allocated direct block
    162 			 * and the file already extends past this block,
    163 			 * thus this must be a whole block.
    164 			 * Just read the block (if requested).
    165 			 */
    166 
    167 			if (bpp != NULL) {
    168 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    169 					      bpp);
    170 				if (error) {
    171 					brelse(*bpp);
    172 					return (error);
    173 				}
    174 			}
    175 			return (0);
    176 		}
    177 		if (nb != 0) {
    178 
    179 			/*
    180 			 * Consider need to reallocate a fragment.
    181 			 */
    182 
    183 			osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size));
    184 			nsize = fragroundup(fs, size);
    185 			if (nsize <= osize) {
    186 
    187 				/*
    188 				 * The existing block is already
    189 				 * at least as big as we want.
    190 				 * Just read the block (if requested).
    191 				 */
    192 
    193 				if (bpp != NULL) {
    194 					error = bread(vp, lbn, osize, NOCRED,
    195 						      bpp);
    196 					if (error) {
    197 						brelse(*bpp);
    198 						return (error);
    199 					}
    200 				}
    201 				return 0;
    202 			} else {
    203 
    204 				/*
    205 				 * The existing block is smaller than we want,
    206 				 * grow it.
    207 				 */
    208 
    209 				error = ffs_realloccg(ip, lbn,
    210 				    ffs_blkpref(ip, lbn, (int)lbn,
    211 					&ip->i_ffs_db[0]), osize, nsize, cred,
    212 					bpp, &newb);
    213 				if (error)
    214 					return (error);
    215 				if (DOINGSOFTDEP(vp))
    216 					softdep_setup_allocdirect(ip, lbn,
    217 					    newb, nb, nsize, osize,
    218 					    bpp ? *bpp : NULL);
    219 			}
    220 		} else {
    221 
    222 			/*
    223 			 * the block was not previously allocated,
    224 			 * allocate a new block or fragment.
    225 			 */
    226 
    227 			if (ip->i_ffs_size < lblktosize(fs, lbn + 1))
    228 				nsize = fragroundup(fs, size);
    229 			else
    230 				nsize = fs->fs_bsize;
    231 			error = ffs_alloc(ip, lbn,
    232 			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]),
    233 				nsize, cred, &newb);
    234 			if (error)
    235 				return (error);
    236 			if (bpp != NULL) {
    237 				bp = getblk(vp, lbn, nsize, 0, 0);
    238 				bp->b_blkno = fsbtodb(fs, newb);
    239 				if (flags & B_CLRBUF)
    240 					clrbuf(bp);
    241 				*bpp = bp;
    242 			}
    243 			if (DOINGSOFTDEP(vp)) {
    244 				softdep_setup_allocdirect(ip, lbn, newb, 0,
    245 				    nsize, 0, bpp ? *bpp : NULL);
    246 			}
    247 		}
    248 		/* XXX ondisk32 */
    249 		ip->i_ffs_db[lbn] = ufs_rw32((int32_t)newb, needswap);
    250 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    251 		return (0);
    252 	}
    253 
    254 	/*
    255 	 * Determine the number of levels of indirection.
    256 	 */
    257 
    258 	pref = 0;
    259 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    260 		return (error);
    261 
    262 	/*
    263 	 * Fetch the first indirect block allocating if necessary.
    264 	 */
    265 
    266 	--num;
    267 	nb = ufs_rw32(ip->i_ffs_ib[indirs[0].in_off], needswap);
    268 	allocib = NULL;
    269 	allocblk = allociblk;
    270 	if (nb == 0) {
    271 		pref = ffs_blkpref(ip, lbn, 0, (int32_t *)0);
    272 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    273 		    &newb);
    274 		if (error)
    275 			goto fail;
    276 		nb = newb;
    277 		*allocblk++ = nb;
    278 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
    279 		bp->b_blkno = fsbtodb(fs, nb);
    280 		clrbuf(bp);
    281 		if (DOINGSOFTDEP(vp)) {
    282 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
    283 			    newb, 0, fs->fs_bsize, 0, bp);
    284 			bdwrite(bp);
    285 		} else {
    286 
    287 			/*
    288 			 * Write synchronously so that indirect blocks
    289 			 * never point at garbage.
    290 			 */
    291 
    292 			if ((error = bwrite(bp)) != 0)
    293 				goto fail;
    294 		}
    295 		unwindidx = 0;
    296 		allocib = &ip->i_ffs_ib[indirs[0].in_off];
    297 		/* XXX ondisk32 */
    298 		*allocib = ufs_rw32((int32_t)nb, needswap);
    299 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    300 	}
    301 
    302 	/*
    303 	 * Fetch through the indirect blocks, allocating as necessary.
    304 	 */
    305 
    306 	for (i = 1;;) {
    307 		error = bread(vp,
    308 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
    309 		if (error) {
    310 			brelse(bp);
    311 			goto fail;
    312 		}
    313 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
    314 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    315 		if (i == num)
    316 			break;
    317 		i++;
    318 		if (nb != 0) {
    319 			brelse(bp);
    320 			continue;
    321 		}
    322 		if (pref == 0)
    323 			pref = ffs_blkpref(ip, lbn, 0, (int32_t *)0);
    324 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    325 		    &newb);
    326 		if (error) {
    327 			brelse(bp);
    328 			goto fail;
    329 		}
    330 		nb = newb;
    331 		*allocblk++ = nb;
    332 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
    333 		nbp->b_blkno = fsbtodb(fs, nb);
    334 		clrbuf(nbp);
    335 		if (DOINGSOFTDEP(vp)) {
    336 			softdep_setup_allocindir_meta(nbp, ip, bp,
    337 			    indirs[i - 1].in_off, nb);
    338 			bdwrite(nbp);
    339 		} else {
    340 
    341 			/*
    342 			 * Write synchronously so that indirect blocks
    343 			 * never point at garbage.
    344 			 */
    345 
    346 			if ((error = bwrite(nbp)) != 0) {
    347 				brelse(bp);
    348 				goto fail;
    349 			}
    350 		}
    351 		if (unwindidx < 0)
    352 			unwindidx = i - 1;
    353 		/* XXX ondisk32 */
    354 		bap[indirs[i - 1].in_off] = ufs_rw32((int32_t)nb, needswap);
    355 
    356 		/*
    357 		 * If required, write synchronously, otherwise use
    358 		 * delayed write.
    359 		 */
    360 
    361 		if (flags & B_SYNC) {
    362 			bwrite(bp);
    363 		} else {
    364 			bdwrite(bp);
    365 		}
    366 	}
    367 
    368 	/*
    369 	 * Get the data block, allocating if necessary.
    370 	 */
    371 
    372 	if (nb == 0) {
    373 		pref = ffs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
    374 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    375 		    &newb);
    376 		if (error) {
    377 			brelse(bp);
    378 			goto fail;
    379 		}
    380 		nb = newb;
    381 		*allocblk++ = nb;
    382 		if (bpp != NULL) {
    383 			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
    384 			nbp->b_blkno = fsbtodb(fs, nb);
    385 			if (flags & B_CLRBUF)
    386 				clrbuf(nbp);
    387 			*bpp = nbp;
    388 		}
    389 		if (DOINGSOFTDEP(vp))
    390 			softdep_setup_allocindir_page(ip, lbn, bp,
    391 			    indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
    392 		/* XXX ondisk32 */
    393 		bap[indirs[num].in_off] = ufs_rw32((int32_t)nb, needswap);
    394 		if (allocib == NULL && unwindidx < 0) {
    395 			unwindidx = i - 1;
    396 		}
    397 
    398 		/*
    399 		 * If required, write synchronously, otherwise use
    400 		 * delayed write.
    401 		 */
    402 
    403 		if (flags & B_SYNC) {
    404 			bwrite(bp);
    405 		} else {
    406 			bdwrite(bp);
    407 		}
    408 		return (0);
    409 	}
    410 	brelse(bp);
    411 	if (bpp != NULL) {
    412 		if (flags & B_CLRBUF) {
    413 			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
    414 			if (error) {
    415 				brelse(nbp);
    416 				goto fail;
    417 			}
    418 		} else {
    419 			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
    420 			nbp->b_blkno = fsbtodb(fs, nb);
    421 			clrbuf(nbp);
    422 		}
    423 		*bpp = nbp;
    424 	}
    425 	return (0);
    426 
    427 fail:
    428 	/*
    429 	 * If we have failed part way through block allocation, we
    430 	 * have to deallocate any indirect blocks that we have allocated.
    431 	 */
    432 
    433 	if (unwindidx >= 0) {
    434 
    435 		/*
    436 		 * First write out any buffers we've created to resolve their
    437 		 * softdeps.  This must be done in reverse order of creation
    438 		 * so that we resolve the dependencies in one pass.
    439 		 * Write the cylinder group buffers for these buffers too.
    440 		 */
    441 
    442 		for (i = num; i >= unwindidx; i--) {
    443 			if (i == 0) {
    444 				break;
    445 			}
    446 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
    447 			    0);
    448 			if (bp->b_flags & B_DELWRI) {
    449 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
    450 				    dbtofsb(fs, bp->b_blkno))));
    451 				bwrite(bp);
    452 				bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
    453 				    0, 0);
    454 				if (bp->b_flags & B_DELWRI) {
    455 					bwrite(bp);
    456 				} else {
    457 					bp->b_flags |= B_INVAL;
    458 					brelse(bp);
    459 				}
    460 			} else {
    461 				bp->b_flags |= B_INVAL;
    462 				brelse(bp);
    463 			}
    464 		}
    465 		if (unwindidx == 0) {
    466 			ip->i_flag |= IN_MODIFIED | IN_CHANGE | IN_UPDATE;
    467 			VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
    468 		}
    469 
    470 		/*
    471 		 * Now that any dependencies that we created have been
    472 		 * resolved, we can undo the partial allocation.
    473 		 */
    474 
    475 		if (unwindidx == 0) {
    476 			*allocib = 0;
    477 			ip->i_flag |= IN_MODIFIED | IN_CHANGE | IN_UPDATE;
    478 			VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
    479 		} else {
    480 			int r;
    481 
    482 			r = bread(vp, indirs[unwindidx].in_lbn,
    483 			    (int)fs->fs_bsize, NOCRED, &bp);
    484 			if (r) {
    485 				panic("Could not unwind indirect block, error %d", r);
    486 				brelse(bp);
    487 			} else {
    488 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
    489 				bap[indirs[unwindidx].in_off] = 0;
    490 				bwrite(bp);
    491 			}
    492 		}
    493 		for (i = unwindidx + 1; i <= num; i++) {
    494 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
    495 			    0);
    496 			bp->b_flags |= B_INVAL;
    497 			brelse(bp);
    498 		}
    499 	}
    500 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    501 		ffs_blkfree(ip, *blkp, fs->fs_bsize);
    502 		deallocated += fs->fs_bsize;
    503 	}
    504 	if (deallocated) {
    505 #ifdef QUOTA
    506 		/*
    507 		 * Restore user's disk quota because allocation failed.
    508 		 */
    509 		(void)chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
    510 #endif
    511 		ip->i_ffs_blocks -= btodb(deallocated);
    512 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    513 	}
    514 	return (error);
    515 }
    516 
    517 
    518 int
    519 ffs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
    520     struct ucred *cred)
    521 {
    522 	struct inode *ip = VTOI(vp);
    523 	struct fs *fs = ip->i_fs;
    524 	int error, delta, bshift, bsize;
    525 	UVMHIST_FUNC("ffs_gop_alloc"); UVMHIST_CALLED(ubchist);
    526 
    527 	error = 0;
    528 	bshift = fs->fs_bshift;
    529 	bsize = 1 << bshift;
    530 
    531 	delta = off & (bsize - 1);
    532 	off -= delta;
    533 	len += delta;
    534 
    535 	while (len > 0) {
    536 		bsize = MIN(bsize, len);
    537 
    538 		error = VOP_BALLOC(vp, off, bsize, cred, flags, NULL);
    539 		if (error) {
    540 			goto out;
    541 		}
    542 
    543 		/*
    544 		 * increase file size now, VOP_BALLOC() requires that
    545 		 * EOF be up-to-date before each call.
    546 		 */
    547 
    548 		if (ip->i_ffs_size < off + bsize) {
    549 			UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x",
    550 			    vp, ip->i_ffs_size, off + bsize, 0);
    551 			ip->i_ffs_size = off + bsize;
    552 		}
    553 
    554 		off += bsize;
    555 		len -= bsize;
    556 	}
    557 
    558 out:
    559 	return error;
    560 }
    561