Home | History | Annotate | Line # | Download | only in lfs
lfs_inode.c revision 1.33
      1 /*	$NetBSD: lfs_inode.c,v 1.33 2000/04/23 21:10:27 perseant Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *      This product includes software developed by the NetBSD
     21  *      Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 /*
     39  * Copyright (c) 1986, 1989, 1991, 1993
     40  *	The Regents of the University of California.  All rights reserved.
     41  *
     42  * Redistribution and use in source and binary forms, with or without
     43  * modification, are permitted provided that the following conditions
     44  * are met:
     45  * 1. Redistributions of source code must retain the above copyright
     46  *    notice, this list of conditions and the following disclaimer.
     47  * 2. Redistributions in binary form must reproduce the above copyright
     48  *    notice, this list of conditions and the following disclaimer in the
     49  *    documentation and/or other materials provided with the distribution.
     50  * 3. All advertising materials mentioning features or use of this software
     51  *    must display the following acknowledgement:
     52  *	This product includes software developed by the University of
     53  *	California, Berkeley and its contributors.
     54  * 4. Neither the name of the University nor the names of its contributors
     55  *    may be used to endorse or promote products derived from this software
     56  *    without specific prior written permission.
     57  *
     58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68  * SUCH DAMAGE.
     69  *
     70  *	@(#)lfs_inode.c	8.9 (Berkeley) 5/8/95
     71  */
     72 
     73 #if defined(_KERNEL) && !defined(_LKM)
     74 #include "opt_quota.h"
     75 #endif
     76 
     77 #include <sys/param.h>
     78 #include <sys/systm.h>
     79 #include <sys/mount.h>
     80 #include <sys/proc.h>
     81 #include <sys/file.h>
     82 #include <sys/buf.h>
     83 #include <sys/vnode.h>
     84 #include <sys/kernel.h>
     85 #include <sys/malloc.h>
     86 
     87 #include <vm/vm.h>
     88 
     89 #include <ufs/ufs/quota.h>
     90 #include <ufs/ufs/inode.h>
     91 #include <ufs/ufs/ufsmount.h>
     92 #include <ufs/ufs/ufs_extern.h>
     93 
     94 #include <ufs/lfs/lfs.h>
     95 #include <ufs/lfs/lfs_extern.h>
     96 
     97 static int lfs_vinvalbuf __P((struct vnode *, struct ucred *, struct proc *, ufs_daddr_t));
     98 
     99 /* Search a block for a specific dinode. */
    100 struct dinode *
    101 lfs_ifind(fs, ino, bp)
    102 	struct lfs *fs;
    103 	ino_t ino;
    104 	struct buf *bp;
    105 {
    106 	int cnt;
    107 	struct dinode *dip = (struct dinode *)bp->b_data;
    108 	struct dinode *ldip;
    109 
    110 	for (cnt = INOPB(fs), ldip = dip + (cnt - 1); cnt--; --ldip)
    111 		if (ldip->di_inumber == ino)
    112 			return (ldip);
    113 
    114 	printf("offset is %d (seg %d)\n", fs->lfs_offset, datosn(fs,fs->lfs_offset));
    115 	printf("block is %d (seg %d)\n", bp->b_blkno, datosn(fs,bp->b_blkno));
    116 	panic("lfs_ifind: dinode %u not found", ino);
    117 	/* NOTREACHED */
    118 }
    119 
    120 int
    121 lfs_update(v)
    122 	void *v;
    123 {
    124 	struct vop_update_args /* {
    125 				  struct vnode *a_vp;
    126 				  struct timespec *a_access;
    127 				  struct timespec *a_modify;
    128 				  int a_waitfor;
    129 				  } */ *ap = v;
    130 	struct inode *ip;
    131 	struct vnode *vp = ap->a_vp;
    132 	int mod, oflag;
    133 	struct timespec ts;
    134 	struct lfs *fs = VFSTOUFS(vp->v_mount)->um_lfs;
    135 
    136 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
    137 		return (0);
    138 	ip = VTOI(vp);
    139 
    140 	/*
    141 	 * If we are called from vinvalbuf, and the file's blocks have
    142 	 * already been scheduled for writing, but the writes have not
    143 	 * yet completed, lfs_vflush will not be called, and vinvalbuf
    144 	 * will cause a panic.  So, we must wait until any pending write
    145 	 * for our inode completes, if we are called with LFS_SYNC set.
    146 	 */
    147 	while((ap->a_waitfor & LFS_SYNC) && WRITEINPROG(vp)) {
    148 #ifdef DEBUG_LFS
    149 		printf("lfs_update: sleeping on inode %d (in-progress)\n",ip->i_number);
    150 #endif
    151 		tsleep(vp, (PRIBIO+1), "lfs_update", 0);
    152 	}
    153 	mod = ip->i_flag & IN_MODIFIED;
    154 	oflag = ip->i_flag;
    155 	TIMEVAL_TO_TIMESPEC(&time, &ts);
    156 	LFS_ITIMES(ip,
    157 		   ap->a_access ? ap->a_access : &ts,
    158 		   ap->a_modify ? ap->a_modify : &ts, &ts);
    159 	if (!mod && (ip->i_flag & IN_MODIFIED))
    160 		ip->i_lfs->lfs_uinodes++;
    161 	if ((ip->i_flag & (IN_MODIFIED|IN_CLEANING)) == 0) {
    162 		return (0);
    163 	}
    164 
    165 	/* If sync, push back the vnode and any dirty blocks it may have. */
    166 	if(ap->a_waitfor & LFS_SYNC) {
    167 		/* Avoid flushing VDIROP. */
    168 		++fs->lfs_diropwait;
    169 		while(vp->v_flag & VDIROP) {
    170 #ifdef DEBUG_LFS
    171 			printf("lfs_update: sleeping on inode %d (dirops)\n",ip->i_number);
    172 #endif
    173 			if(fs->lfs_dirops==0)
    174 				lfs_flush_fs(vp->v_mount,SEGM_SYNC);
    175 			else
    176 				tsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", 0);
    177 			/* XXX KS - by falling out here, are we writing the vn
    178 			twice? */
    179 		}
    180 		--fs->lfs_diropwait;
    181 		return lfs_vflush(vp);
    182         }
    183 	return 0;
    184 }
    185 
    186 /* Update segment usage information when removing a block. */
    187 #define UPDATE_SEGUSE \
    188 	if (lastseg != -1) { \
    189 		LFS_SEGENTRY(sup, fs, lastseg, sup_bp); \
    190 		if (num > sup->su_nbytes) { \
    191 			printf("lfs_truncate: negative bytes: segment %d short by %d\n", \
    192 			      lastseg, num - sup->su_nbytes); \
    193 			panic("lfs_truncate: negative bytes"); \
    194 		      sup->su_nbytes = 0; \
    195 		} else \
    196 		sup->su_nbytes -= num; \
    197 		e1 = VOP_BWRITE(sup_bp); \
    198 		fragsreleased += numfrags(fs, num); \
    199 	}
    200 
    201 #define SEGDEC(S) { \
    202 	if (daddr != 0) { \
    203 		if (lastseg != (seg = datosn(fs, daddr))) { \
    204 			UPDATE_SEGUSE; \
    205 			num = (S); \
    206 			lastseg = seg; \
    207 		} else \
    208 			num += (S); \
    209 	} \
    210 }
    211 
    212 /*
    213  * Truncate the inode ip to at most length size.  Update segment usage
    214  * table information.
    215  */
    216 /* ARGSUSED */
    217 int
    218 lfs_truncate(v)
    219 	void *v;
    220 {
    221 	struct vop_truncate_args /* {
    222 		struct vnode *a_vp;
    223 		off_t a_length;
    224 		int a_flags;
    225 		struct ucred *a_cred;
    226 		struct proc *a_p;
    227 	} */ *ap = v;
    228 	struct indir *inp;
    229 	int i;
    230         int error, aflags;
    231 	ufs_daddr_t *daddrp;
    232 	struct vnode *vp = ap->a_vp;
    233 	off_t length = ap->a_length;
    234 	struct buf *bp, *sup_bp;
    235 	struct ifile *ifp;
    236 	struct inode *ip;
    237 	struct lfs *fs;
    238 	struct indir a[NIADDR + 2], a_end[NIADDR + 2];
    239 	SEGUSE *sup;
    240 	ufs_daddr_t daddr, lastblock, lbn, olastblock;
    241 	ufs_daddr_t oldsize_lastblock, oldsize_newlast, newsize;
    242 	long off, a_released, fragsreleased, i_released;
    243 	int e1, e2, depth, lastseg, num, offset, seg, freesize, s;
    244 
    245 	if (length < 0)
    246 		return (EINVAL);
    247 
    248 	ip = VTOI(vp);
    249 	if (length == ip->i_ffs_size) /* XXX don't update times */
    250 		return 0;
    251 
    252 	if (vp->v_type == VLNK &&
    253 	   (ip->i_ffs_size < vp->v_mount->mnt_maxsymlinklen ||
    254 	     (vp->v_mount->mnt_maxsymlinklen == 0 &&
    255 	      ip->i_din.ffs_din.di_blocks == 0))) {
    256 #ifdef DIAGNOSTIC
    257 		if (length != 0)
    258 			panic("lfs_truncate: partial truncate of symlink");
    259 #endif
    260 		bzero((char *)&ip->i_ffs_shortlink, (u_int)ip->i_ffs_size);
    261 		ip->i_ffs_size = 0;
    262 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    263 		return (VOP_UPDATE(vp, NULL, NULL, 0));
    264 	}
    265 
    266 	fs = ip->i_lfs;
    267 	lfs_imtime(fs);
    268 
    269 	/* If length is larger than the file, just update the times. */
    270 	if (ip->i_ffs_size < length) {
    271 		if (length > fs->lfs_maxfilesize)
    272 			return (EFBIG);
    273 		/*
    274 		 * Allocate the new last block to ensure that any previously
    275 		 * existing fragments get extended.  (XXX Adding the new
    276 		 * block is not really necessary.)
    277 		 */
    278 		error = VOP_BALLOC(vp, length - 1, 1, ap->a_cred, aflags, &bp);
    279 		if (error)
    280  			return (error);
    281 		VOP_BWRITE(bp);
    282 		ip->i_ffs_size = length;
    283 		uvm_vnp_setsize(vp, length);
    284 		(void) uvm_vnp_uncache(vp);
    285 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    286 		return (VOP_UPDATE(vp, NULL, NULL, 0));
    287 	}
    288 	uvm_vnp_setsize(vp, length);
    289 
    290 	/*
    291 	 * Make sure no writes happen while we're truncating.
    292 	 * Otherwise, blocks which are accounted for on the inode
    293 	 * *and* which have been created for cleaning can coexist,
    294 	 * and cause us to overcount, and panic below.
    295 	 *
    296 	 * XXX KS - too restrictive?  Maybe only when cleaning?
    297 	 */
    298 	while(fs->lfs_seglock && fs->lfs_lockpid != ap->a_p->p_pid) {
    299 		tsleep(&fs->lfs_seglock, (PRIBIO+1), "lfs_truncate", 0);
    300 	}
    301 
    302 	/*
    303 	 * Calculate index into inode's block list of last direct and indirect
    304 	 * blocks (if any) which we want to keep.  Lastblock is 0 when the
    305 	 * file is truncated to 0.
    306 	 */
    307 	lastblock = lblkno(fs, length + fs->lfs_bsize - 1);
    308 	olastblock = lblkno(fs, ip->i_ffs_size + fs->lfs_bsize - 1) - 1;
    309 
    310 	/*
    311 	 * Update the size of the file. If the file is not being truncated to
    312 	 * a block boundry, the contents of the partial block following the end
    313 	 * must be zero'd in case it ever becomes accessible again
    314 	 * because of subsequent file growth.  For this part of the code,
    315 	 * oldsize_newlast refers to the old size of the new last block in the
    316 	 * file.
    317 	 */
    318 	offset = blkoff(fs, length);
    319 	lbn = lblkno(fs, length);
    320 	oldsize_newlast = blksize(fs, ip, lbn);
    321 
    322 	/* Now set oldsize to the current size of the current last block */
    323 	oldsize_lastblock = blksize(fs, ip, olastblock);
    324 	if (offset == 0)
    325 		ip->i_ffs_size = length;
    326 	else {
    327 #ifdef QUOTA
    328 		if ((e1 = getinoquota(ip)) != 0)
    329 			return (e1);
    330 #endif
    331 		if ((e1 = bread(vp, lbn, oldsize_newlast, NOCRED, &bp)) != 0) {
    332 			printf("lfs_truncate: bread: %d\n",e1);
    333 			brelse(bp);
    334 			return (e1);
    335 		}
    336 		ip->i_ffs_size = length;
    337 		(void)uvm_vnp_uncache(vp);
    338 		newsize = blksize(fs, ip, lbn);
    339 		bzero((char *)bp->b_data + offset, (u_int)(newsize - offset));
    340 #ifdef DEBUG
    341 		if(bp->b_flags & B_CALL)
    342 		    panic("Can't allocbuf malloced buffer!");
    343 		else
    344 #endif
    345 			allocbuf(bp, newsize);
    346 		if(oldsize_newlast > newsize)
    347 			ip->i_ffs_blocks -= btodb(oldsize_newlast - newsize);
    348 		if ((e1 = VOP_BWRITE(bp)) != 0) {
    349 			printf("lfs_truncate: bwrite: %d\n",e1);
    350 			return (e1);
    351 		}
    352 	}
    353 	/*
    354 	 * Modify sup->su_nbyte counters for each deleted block; keep track
    355 	 * of number of blocks removed for ip->i_ffs_blocks.
    356 	 */
    357 	fragsreleased = 0;
    358 	num = 0;
    359 	lastseg = -1;
    360 
    361 	for (lbn = olastblock; lbn >= lastblock;) {
    362 		/* XXX use run length from bmap array to make this faster */
    363 		ufs_bmaparray(vp, lbn, &daddr, a, &depth, NULL);
    364 		if (lbn == olastblock) {
    365 			for (i = NIADDR + 2; i--;)
    366 				a_end[i] = a[i];
    367 			freesize = oldsize_lastblock;
    368 		} else
    369 			freesize = fs->lfs_bsize;
    370 		switch (depth) {
    371 		case 0:		/* Direct block. */
    372 			daddr = ip->i_ffs_db[lbn];
    373 			SEGDEC(freesize);
    374 			ip->i_ffs_db[lbn] = 0;
    375 			--lbn;
    376 			break;
    377 #ifdef DIAGNOSTIC
    378 		case 1:		/* An indirect block. */
    379 			panic("lfs_truncate: ufs_bmaparray returned depth 1");
    380 			/* NOTREACHED */
    381 #endif
    382 		default:	/* Chain of indirect blocks. */
    383 			inp = a + --depth;
    384 			if (inp->in_off > 0 && lbn != lastblock) {
    385 				lbn -= inp->in_off < lbn - lastblock ?
    386 					inp->in_off : lbn - lastblock;
    387 				break;
    388 			}
    389 			for (; depth && (inp->in_off == 0 || lbn == lastblock);
    390 			     --inp, --depth) {
    391 				if (bread(vp,
    392 					  inp->in_lbn, fs->lfs_bsize, NOCRED, &bp))
    393 					panic("lfs_truncate: bread bno %d",
    394 					      inp->in_lbn);
    395 				daddrp = (ufs_daddr_t *)bp->b_data + inp->in_off;
    396 				for (i = inp->in_off;
    397 				     i++ <= a_end[depth].in_off;) {
    398 					daddr = *daddrp++;
    399 					SEGDEC(freesize);
    400 				}
    401 				a_end[depth].in_off = NINDIR(fs) - 1;
    402 				if (inp->in_off == 0)
    403 					brelse (bp);
    404 				else {
    405 					bzero((ufs_daddr_t *)bp->b_data +
    406 					      inp->in_off, fs->lfs_bsize -
    407 					      inp->in_off * sizeof(ufs_daddr_t));
    408 					if ((e1 = VOP_BWRITE(bp)) != 0) {
    409 						printf("lfs_truncate: indir bwrite: %d\n",e1);
    410 						return (e1);
    411 					}
    412 				}
    413 			}
    414 			if (depth == 0 && a[1].in_off == 0) {
    415 				off = a[0].in_off;
    416 				daddr = ip->i_ffs_ib[off];
    417 				SEGDEC(freesize);
    418 				ip->i_ffs_ib[off] = 0;
    419 			}
    420 			if (lbn == lastblock || lbn <= NDADDR)
    421 				--lbn;
    422 			else {
    423 				lbn -= NINDIR(fs);
    424 				if (lbn < lastblock)
    425 					lbn = lastblock;
    426 			}
    427 		}
    428 	}
    429 	UPDATE_SEGUSE;
    430 
    431 	/* If truncating the file to 0, update the version number. */
    432 	if (length == 0) {
    433 		LFS_IENTRY(ifp, fs, ip->i_number, bp);
    434 		++ifp->if_version;
    435 		(void) VOP_BWRITE(bp);
    436 	}
    437 #ifdef DIAGNOSTIC
    438 	if (ip->i_ffs_blocks < fragstodb(fs, fragsreleased)) {
    439 		panic("lfs_truncate: frag count < 0 (%d<%ld), ino %d\n",
    440 			    ip->i_ffs_blocks, fragstodb(fs, fragsreleased),
    441 			    ip->i_number);
    442 		fragsreleased = dbtofrags(fs, ip->i_ffs_blocks);
    443 	}
    444 #endif
    445 	ip->i_ffs_blocks -= fragstodb(fs, fragsreleased);
    446 	fs->lfs_bfree +=  fragstodb(fs, fragsreleased);
    447 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
    448 	/*
    449 	 * Traverse dirty block list counting number of dirty buffers
    450 	 * that are being deleted out of the cache, so that the lfs_avail
    451 	 * field can be updated.
    452 	 */
    453 	a_released = 0;
    454 	i_released = 0;
    455 
    456 	s = splbio();
    457 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {
    458 
    459 		/* XXX KS - Don't miscount if we're not truncating to zero. */
    460 		if(length>0 && !(bp->b_lblkno >= 0 && bp->b_lblkno > lastblock)
    461 		   && !(bp->b_lblkno < 0 && bp->b_lblkno < -lastblock-NIADDR))
    462 			continue;
    463 
    464 		if (bp->b_flags & B_LOCKED) {
    465 			a_released += numfrags(fs, bp->b_bcount);
    466 			/*
    467 			 * XXX
    468 			 * When buffers are created in the cache, their block
    469 			 * number is set equal to their logical block number.
    470 			 * If that is still true, we are assuming that the
    471 			 * blocks are new (not yet on disk) and weren't
    472 			 * counted above.  However, there is a slight chance
    473 			 * that a block's disk address is equal to its logical
    474 			 * block number in which case, we'll get an overcounting
    475 			 * here.
    476 			 */
    477 			if (bp->b_blkno == bp->b_lblkno) {
    478 				i_released += numfrags(fs, bp->b_bcount);
    479 			}
    480 		}
    481 	}
    482 	splx(s);
    483 	fragsreleased = i_released;
    484 #ifdef DIAGNOSTIC
    485 	if (fragsreleased > dbtofrags(fs, ip->i_ffs_blocks)) {
    486 		printf("lfs_inode: %ld frags released > %d in inode %d\n",
    487 		       fragsreleased, dbtofrags(fs, ip->i_ffs_blocks),
    488 		       ip->i_number);
    489 		fragsreleased = dbtofrags(fs, ip->i_ffs_blocks);
    490 	}
    491 #endif
    492 	fs->lfs_bfree += fragstodb(fs, fragsreleased);
    493 	ip->i_ffs_blocks -= fragstodb(fs, fragsreleased);
    494 #ifdef DIAGNOSTIC
    495 	if (length == 0 && ip->i_ffs_blocks != 0) {
    496 		printf("lfs_inode: trunc to zero, but %d blocks left on inode %d\n",
    497 		       ip->i_ffs_blocks, ip->i_number);
    498 		panic("lfs_inode\n");
    499 	}
    500 #endif
    501 	fs->lfs_avail += fragstodb(fs, a_released);
    502 	if(length>0)
    503 		e1 = lfs_vinvalbuf(vp, ap->a_cred, ap->a_p, lastblock-1);
    504 	else
    505 		e1 = vinvalbuf(vp, 0, ap->a_cred, ap->a_p, 0, 0);
    506 	e2 = VOP_UPDATE(vp, NULL, NULL, 0);
    507 	if(e1)
    508 		printf("lfs_truncate: vinvalbuf: %d\n",e1);
    509 	if(e2)
    510 		printf("lfs_truncate: update: %d\n",e2);
    511 
    512 	return (e1 ? e1 : e2 ? e2 : 0);
    513 }
    514 
    515 /*
    516  * Get rid of blocks a la vinvalbuf; but only blocks that are of a higher
    517  * lblkno than the file size allows.
    518  */
    519 int
    520 lfs_vinvalbuf(vp, cred, p, maxblk)
    521 	struct vnode *vp;
    522 	struct ucred *cred;
    523 	struct proc *p;
    524 	ufs_daddr_t maxblk;
    525 {
    526 	struct buf *bp;
    527 	struct buf *nbp, *blist;
    528 	int i, s, error, dirty;
    529 
    530       top:
    531 	dirty=0;
    532 	for (i=0;i<2;i++) {
    533 		if(i==0)
    534 			blist = vp->v_cleanblkhd.lh_first;
    535 		else /* i == 1 */
    536 			blist = vp->v_dirtyblkhd.lh_first;
    537 
    538 		s = splbio();
    539 		for (bp = blist; bp; bp = nbp) {
    540 			nbp = bp->b_vnbufs.le_next;
    541 
    542 			if (bp->b_flags & B_GATHERED) {
    543 				error = tsleep(vp, PRIBIO+1, "lfs_vin2", 0);
    544 				splx(s);
    545 				if(error)
    546 					return error;
    547 				goto top;
    548 			}
    549 			if (bp->b_flags & B_BUSY) {
    550 				bp->b_flags |= B_WANTED;
    551 				error = tsleep((caddr_t)bp,
    552 					(PRIBIO + 1), "lfs_vinval", 0);
    553 				if (error) {
    554 					splx(s);
    555 					return (error);
    556 				}
    557 				goto top;
    558 			}
    559 
    560 			bp->b_flags |= B_BUSY;
    561 			if((bp->b_lblkno >= 0 && bp->b_lblkno > maxblk)
    562 			   || (bp->b_lblkno < 0 && bp->b_lblkno < -maxblk-(NIADDR-1)))
    563 			{
    564 				bp->b_flags |= B_INVAL | B_VFLUSH;
    565 				if(bp->b_flags & B_CALL) {
    566 					lfs_freebuf(bp);
    567 				} else {
    568 					brelse(bp);
    569 				}
    570 				++dirty;
    571 			} else {
    572 				/*
    573 				 * This buffer is still on its free list.
    574 				 * So don't brelse, but wake up any sleepers.
    575 				 */
    576 				bp->b_flags &= ~B_BUSY;
    577 				if(bp->b_flags & B_WANTED) {
    578 					bp->b_flags &= ~(B_WANTED|B_AGE);
    579 					wakeup(bp);
    580 				}
    581 			}
    582 		}
    583 		splx(s);
    584 	}
    585 	if(dirty)
    586 		goto top;
    587 	return (0);
    588 }
    589