Home | History | Annotate | Line # | Download | only in lfs
lfs_syscalls.c revision 1.15
      1 /*	$NetBSD: lfs_syscalls.c,v 1.15 1998/02/19 00:54:39 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1991, 1993, 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)lfs_syscalls.c	8.6 (Berkeley) 6/16/94
     36  */
     37 
     38 #include "fs_lfs.h"		/* for prototypes in syscallargs.h */
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/proc.h>
     43 #include <sys/buf.h>
     44 #include <sys/mount.h>
     45 #include <sys/vnode.h>
     46 #include <sys/malloc.h>
     47 #include <sys/kernel.h>
     48 
     49 #include <sys/syscallargs.h>
     50 
     51 #include <ufs/ufs/quota.h>
     52 #include <ufs/ufs/inode.h>
     53 #include <ufs/ufs/ufsmount.h>
     54 #include <ufs/ufs/ufs_extern.h>
     55 
     56 #include <ufs/lfs/lfs.h>
     57 #include <ufs/lfs/lfs_extern.h>
     58 
     59 #define BUMP_FIP(SP) \
     60 	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
     61 
     62 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
     63 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
     64 
     65 /*
     66  * Before committing to add something to a segment summary, make sure there
     67  * is enough room.  S is the bytes added to the summary.
     68  */
     69 #define	CHECK_SEG(s)			\
     70 if (sp->sum_bytes_left < (s)) {		\
     71 	(void) lfs_writeseg(fs, sp);	\
     72 }
     73 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
     74 
     75 /*
     76  * lfs_markv:
     77  *
     78  * This will mark inodes and blocks dirty, so they are written into the log.
     79  * It will block until all the blocks have been written.  The segment create
     80  * time passed in the block_info and inode_info structures is used to decide
     81  * if the data is valid for each block (in case some process dirtied a block
     82  * or inode that is being cleaned between the determination that a block is
     83  * live and the lfs_markv call).
     84  *
     85  *  0 on success
     86  * -1/errno is return on error.
     87  */
     88 int
     89 lfs_markv(p, v, retval)
     90 	struct proc *p;
     91 	void *v;
     92 	register_t *retval;
     93 {
     94 	struct lfs_markv_args /* {
     95 		syscallarg(fsid_t *) fsidp;
     96 		syscallarg(struct block_info *) blkiov;
     97 		syscallarg(int) blkcnt;
     98 	} */ *uap = v;
     99 	struct segment *sp;
    100 	BLOCK_INFO *blkp;
    101 	IFILE *ifp;
    102 	struct buf *bp, **bpp;
    103 	struct inode *ip = NULL;
    104 	struct lfs *fs;
    105 	struct mount *mntp;
    106 	struct vnode *vp;
    107 	fsid_t fsid;
    108 	void *start;
    109 	ino_t lastino;
    110 	daddr_t b_daddr, v_daddr;
    111 	u_long bsize;
    112 	int cnt, error;
    113 
    114 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    115 		return (error);
    116 
    117 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
    118 		return (error);
    119 	if ((mntp = getvfs(&fsid)) == NULL)
    120 		return (EINVAL);
    121 
    122 	cnt = SCARG(uap, blkcnt);
    123 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    124 	error = copyin(SCARG(uap, blkiov), start, cnt * sizeof(BLOCK_INFO));
    125 	if (error)
    126 		goto err1;
    127 
    128 	/* Mark blocks/inodes dirty.  */
    129 	fs = VFSTOUFS(mntp)->um_lfs;
    130 	bsize = fs->lfs_bsize;
    131 	error = 0;
    132 
    133 	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
    134 	sp = fs->lfs_sp;
    135 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
    136 	    blkp = start; cnt--; ++blkp) {
    137 		/*
    138 		 * Get the IFILE entry (only once) and see if the file still
    139 		 * exists.
    140 		 */
    141 		if (lastino != blkp->bi_inode) {
    142 			if (lastino != LFS_UNUSED_INUM) {
    143 				/* Finish up last file */
    144 				if (sp->fip->fi_nblocks == 0) {
    145 					DEC_FINFO(sp);
    146 					sp->sum_bytes_left +=
    147 					    sizeof(FINFO) - sizeof(daddr_t);
    148 				} else {
    149 					lfs_updatemeta(sp);
    150 					BUMP_FIP(sp);
    151 				}
    152 
    153 				lfs_writeinode(fs, sp, ip);
    154 				lfs_vunref(vp);
    155 			}
    156 
    157 			/* Start a new file */
    158 			CHECK_SEG(sizeof(FINFO));
    159 			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
    160 			INC_FINFO(sp);
    161 			sp->start_lbp = &sp->fip->fi_blocks[0];
    162 			sp->vp = NULL;
    163 			sp->fip->fi_version = blkp->bi_version;
    164 			sp->fip->fi_nblocks = 0;
    165 			sp->fip->fi_ino = blkp->bi_inode;
    166 			lastino = blkp->bi_inode;
    167 			if (blkp->bi_inode == LFS_IFILE_INUM)
    168 				v_daddr = fs->lfs_idaddr;
    169 			else {
    170 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
    171 				v_daddr = ifp->if_daddr;
    172 				brelse(bp);
    173 			}
    174 			if (v_daddr == LFS_UNUSED_DADDR)
    175 				continue;
    176 
    177 			/* Get the vnode/inode. */
    178 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
    179 			    blkp->bi_lbn == LFS_UNUSED_LBN ?
    180 			    blkp->bi_bp : NULL)) {
    181 #ifdef DIAGNOSTIC
    182 				printf("lfs_markv: VFS_VGET failed (%d)\n",
    183 				    blkp->bi_inode);
    184 #endif
    185 				lastino = LFS_UNUSED_INUM;
    186 				v_daddr = LFS_UNUSED_DADDR;
    187 				continue;
    188 			}
    189 			sp->vp = vp;
    190 			ip = VTOI(vp);
    191 		} else if (v_daddr == LFS_UNUSED_DADDR)
    192 			continue;
    193 
    194 		/* If this BLOCK_INFO didn't contain a block, keep going. */
    195 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    196 			continue;
    197 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
    198 		    b_daddr != blkp->bi_daddr)
    199 			continue;
    200 		/*
    201 		 * If we got to here, then we are keeping the block.  If it
    202 		 * is an indirect block, we want to actually put it in the
    203 		 * buffer cache so that it can be updated in the finish_meta
    204 		 * section.  If it's not, we need to allocate a fake buffer
    205 		 * so that writeseg can perform the copyin and write the buffer.
    206 		 */
    207 		if (blkp->bi_lbn >= 0)	/* Data Block */
    208 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
    209 			    blkp->bi_bp);
    210 		else {
    211 			bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
    212 			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
    213 			    (error = copyin(blkp->bi_bp, bp->b_data,
    214 			    bsize)))
    215 				goto err2;
    216 			if ((error = VOP_BWRITE(bp)) != 0)
    217 				goto err2;
    218 		}
    219 		while (lfs_gatherblock(sp, bp, NULL));
    220 	}
    221 	if (sp->vp) {
    222 		if (sp->fip->fi_nblocks == 0) {
    223 			DEC_FINFO(sp);
    224 			sp->sum_bytes_left +=
    225 			    sizeof(FINFO) - sizeof(daddr_t);
    226 		} else
    227 			lfs_updatemeta(sp);
    228 
    229 		lfs_writeinode(fs, sp, ip);
    230 		lfs_vunref(vp);
    231 	}
    232 	(void) lfs_writeseg(fs, sp);
    233 	lfs_segunlock(fs);
    234 	free(start, M_SEGMENT);
    235 	return (error);
    236 
    237 /*
    238  * XXX
    239  * If we come in to error 2, we might have indirect blocks that were
    240  * updated and now have bad block pointers.  I don't know what to do
    241  * about this.
    242  */
    243 
    244 err2:	lfs_vunref(vp);
    245 	/* Free up fakebuffers */
    246 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
    247 		if ((*bpp)->b_flags & B_CALL) {
    248 			brelvp(*bpp);
    249 			free(*bpp, M_SEGMENT);
    250 		} else
    251 			brelse(*bpp);
    252 	lfs_segunlock(fs);
    253 err1:
    254 	free(start, M_SEGMENT);
    255 	return (error);
    256 }
    257 
    258 /*
    259  * lfs_bmapv:
    260  *
    261  * This will fill in the current disk address for arrays of blocks.
    262  *
    263  *  0 on success
    264  * -1/errno is return on error.
    265  */
    266 int
    267 lfs_bmapv(p, v, retval)
    268 	struct proc *p;
    269 	void *v;
    270 	register_t *retval;
    271 {
    272 	struct lfs_bmapv_args /* {
    273 		syscallarg(fsid_t *) fsidp;
    274 		syscallarg(struct block_info *) blkiov;
    275 		syscallarg(int) blkcnt;
    276 	} */ *uap = v;
    277 	BLOCK_INFO *blkp;
    278 	struct mount *mntp;
    279 	struct vnode *vp;
    280 	fsid_t fsid;
    281 	void *start;
    282 	daddr_t daddr;
    283 	int cnt, error, step;
    284 
    285 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    286 		return (error);
    287 
    288 	error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t));
    289 	if (error)
    290 		return (error);
    291 	if ((mntp = getvfs(&fsid)) == NULL)
    292 		return (EINVAL);
    293 
    294 	cnt = SCARG(uap, blkcnt);
    295 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    296 	error = copyin(SCARG(uap, blkiov), blkp, cnt * sizeof(BLOCK_INFO));
    297 	if (error) {
    298 		free(blkp, M_SEGMENT);
    299 		return (error);
    300 	}
    301 
    302 	for (step = cnt; step--; ++blkp) {
    303 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    304 			continue;
    305 		/* Could be a deadlock ? */
    306 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
    307 			daddr = LFS_UNUSED_DADDR;
    308 		else {
    309 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
    310 				daddr = LFS_UNUSED_DADDR;
    311 			vput(vp);
    312 		}
    313 		blkp->bi_daddr = daddr;
    314         }
    315 	copyout(start, SCARG(uap, blkiov), cnt * sizeof(BLOCK_INFO));
    316 	free(start, M_SEGMENT);
    317 	return (0);
    318 }
    319 
    320 /*
    321  * lfs_segclean:
    322  *
    323  * Mark the segment clean.
    324  *
    325  *  0 on success
    326  * -1/errno is return on error.
    327  */
    328 int
    329 lfs_segclean(p, v, retval)
    330 	struct proc *p;
    331 	void *v;
    332 	register_t *retval;
    333 {
    334 	struct lfs_segclean_args /* {
    335 		syscallarg(fsid_t *) fsidp;
    336 		syscallarg(u_long) segment;
    337 	} */ *uap = v;
    338 	CLEANERINFO *cip;
    339 	SEGUSE *sup;
    340 	struct buf *bp;
    341 	struct mount *mntp;
    342 	struct lfs *fs;
    343 	fsid_t fsid;
    344 	int error;
    345 
    346 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    347 		return (error);
    348 
    349 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
    350 		return (error);
    351 	if ((mntp = getvfs(&fsid)) == NULL)
    352 		return (EINVAL);
    353 
    354 	fs = VFSTOUFS(mntp)->um_lfs;
    355 
    356 	if (datosn(fs, fs->lfs_curseg) == SCARG(uap, segment))
    357 		return (EBUSY);
    358 
    359 	LFS_SEGENTRY(sup, fs, SCARG(uap, segment), bp);
    360 	if (sup->su_flags & SEGUSE_ACTIVE) {
    361 		brelse(bp);
    362 		return (EBUSY);
    363 	}
    364 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
    365 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
    366 	    sup->su_ninos * btodb(fs->lfs_bsize);
    367 	sup->su_flags &= ~SEGUSE_DIRTY;
    368 	(void) VOP_BWRITE(bp);
    369 
    370 	LFS_CLEANERINFO(cip, fs, bp);
    371 	++cip->clean;
    372 	--cip->dirty;
    373 	(void) VOP_BWRITE(bp);
    374 	wakeup(&fs->lfs_avail);
    375 	return (0);
    376 }
    377 
    378 /*
    379  * lfs_segwait:
    380  *
    381  * This will block until a segment in file system fsid is written.  A timeout
    382  * in milliseconds may be specified which will awake the cleaner automatically.
    383  * An fsid of -1 means any file system, and a timeout of 0 means forever.
    384  *
    385  *  0 on success
    386  *  1 on timeout
    387  * -1/errno is return on error.
    388  */
    389 int
    390 lfs_segwait(p, v, retval)
    391 	struct proc *p;
    392 	void *v;
    393 	register_t *retval;
    394 {
    395 	struct lfs_segwait_args /* {
    396 		syscallarg(fsid_t *) fsidp;
    397 		syscallarg(struct timeval *) tv;
    398 	} */ *uap = v;
    399 	extern int lfs_allclean_wakeup;
    400 	struct mount *mntp;
    401 	struct timeval atv;
    402 	fsid_t fsid;
    403 	void *addr;
    404 	u_long timeout;
    405 	int error, s;
    406 
    407 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) {
    408 		return (error);
    409 }
    410 #ifdef WHEN_QUADS_WORK
    411 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    412 		return (error);
    413 	if (fsid == (fsid_t)-1)
    414 		addr = &lfs_allclean_wakeup;
    415 	else {
    416 		if ((mntp = getvfs(&fsid)) == NULL)
    417 			return (EINVAL);
    418 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    419 	}
    420 #else
    421 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
    422 		return (error);
    423 	if ((mntp = getvfs(&fsid)) == NULL)
    424 		addr = &lfs_allclean_wakeup;
    425 	else
    426 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    427 #endif
    428 
    429 	if (SCARG(uap, tv)) {
    430 		error = copyin(SCARG(uap, tv), &atv, sizeof(struct timeval));
    431 		if (error)
    432 			return (error);
    433 		if (itimerfix(&atv))
    434 			return (EINVAL);
    435 		s = splclock();
    436 		timeradd(&atv, &time, &atv);
    437 		timeout = hzto(&atv);
    438 		splx(s);
    439 	} else
    440 		timeout = 0;
    441 
    442 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
    443 	return (error == ERESTART ? EINTR : 0);
    444 }
    445 
    446 /*
    447  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
    448  * daddr from the ifile, so don't look it up again.  If the cleaner is
    449  * processing IINFO structures, it may have the ondisk inode already, so
    450  * don't go retrieving it again.
    451  */
    452 int
    453 lfs_fastvget(mp, ino, daddr, vpp, dinp)
    454 	struct mount *mp;
    455 	ino_t ino;
    456 	daddr_t daddr;
    457 	struct vnode **vpp;
    458 	struct dinode *dinp;
    459 {
    460 	register struct inode *ip;
    461 	struct vnode *vp;
    462 	struct ufsmount *ump;
    463 	struct buf *bp;
    464 	dev_t dev;
    465 	int error;
    466 
    467 	ump = VFSTOUFS(mp);
    468 	dev = ump->um_dev;
    469 	/*
    470 	 * This is playing fast and loose.  Someone may have the inode
    471 	 * locked, in which case they are going to be distinctly unhappy
    472 	 * if we trash something.
    473 	 */
    474 	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
    475 		lfs_vref(*vpp);
    476 		if ((*vpp)->v_flag & VXLOCK)
    477 			printf ("Cleaned vnode VXLOCKED\n");
    478 		ip = VTOI(*vpp);
    479 		if (ip->i_flag & IN_LOCKED)
    480 			printf("cleaned vnode locked\n");
    481 		if (!(ip->i_flag & IN_MODIFIED)) {
    482 			++ump->um_lfs->lfs_uinodes;
    483 			ip->i_flag |= IN_MODIFIED;
    484 		}
    485 		ip->i_flag |= IN_MODIFIED;
    486 		return (0);
    487 	}
    488 
    489 	/* Allocate new vnode/inode. */
    490 	if ((error = lfs_vcreate(mp, ino, &vp)) != 0) {
    491 		*vpp = NULL;
    492 		return (error);
    493 	}
    494 
    495 	/*
    496 	 * Put it onto its hash chain and lock it so that other requests for
    497 	 * this inode will block if they arrive while we are sleeping waiting
    498 	 * for old data structures to be purged or for the contents of the
    499 	 * disk portion of this inode to be read.
    500 	 */
    501 	ip = VTOI(vp);
    502 	ufs_ihashins(ip);
    503 
    504 	/*
    505 	 * XXX
    506 	 * This may not need to be here, logically it should go down with
    507 	 * the i_devvp initialization.
    508 	 * Ask Kirk.
    509 	 */
    510 	ip->i_lfs = ump->um_lfs;
    511 
    512 	/* Read in the disk contents for the inode, copy into the inode. */
    513 	if (dinp) {
    514 		error = copyin(dinp, &ip->i_din.ffs_din, sizeof(struct dinode));
    515 		if (error)
    516 			return (error);
    517 	}
    518 	else {
    519 		error = bread(ump->um_devvp, daddr,
    520 			      (int)ump->um_lfs->lfs_bsize, NOCRED, &bp);
    521 		if (error) {
    522 			/*
    523 			 * The inode does not contain anything useful, so it
    524 			 * would be misleading to leave it on its hash chain.
    525 			 * Iput() will return it to the free list.
    526 			 */
    527 			ufs_ihashrem(ip);
    528 
    529 			/* Unlock and discard unneeded inode. */
    530 			lfs_vunref(vp);
    531 			brelse(bp);
    532 			*vpp = NULL;
    533 			return (error);
    534 		}
    535 		ip->i_din.ffs_din =
    536 		    *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
    537 		brelse(bp);
    538 	}
    539 
    540 	/* Inode was just read from user space or disk, make sure it's locked */
    541 	ip->i_flag |= IN_LOCKED;
    542 
    543 	/*
    544 	 * Initialize the vnode from the inode, check for aliases.  In all
    545 	 * cases re-init ip, the underlying vnode/inode may have changed.
    546 	 */
    547 	error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp);
    548 	if (error) {
    549 		lfs_vunref(vp);
    550 		*vpp = NULL;
    551 		return (error);
    552 	}
    553 	/*
    554 	 * Finish inode initialization now that aliasing has been resolved.
    555 	 */
    556 	ip->i_devvp = ump->um_devvp;
    557 	ip->i_flag |= IN_MODIFIED;
    558 	++ump->um_lfs->lfs_uinodes;
    559 	VREF(ip->i_devvp);
    560 	*vpp = vp;
    561 	return (0);
    562 }
    563 struct buf *
    564 lfs_fakebuf(vp, lbn, size, uaddr)
    565 	struct vnode *vp;
    566 	int lbn;
    567 	size_t size;
    568 	caddr_t uaddr;
    569 {
    570 	struct buf *bp;
    571 
    572 	bp = lfs_newbuf(vp, lbn, 0);
    573 	bp->b_saveaddr = uaddr;
    574 	bp->b_bufsize = size;
    575 	bp->b_bcount = size;
    576 	bp->b_flags |= B_INVAL;
    577 	return (bp);
    578 }
    579