Home | History | Annotate | Line # | Download | only in lfs
lfs_syscalls.c revision 1.9
      1 /*	$NetBSD: lfs_syscalls.c,v 1.9 1995/09/21 23:39:20 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1991, 1993, 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)lfs_syscalls.c	8.6 (Berkeley) 6/16/94
     36  */
     37 
     38 #include <sys/param.h>
     39 #include <sys/systm.h>
     40 #include <sys/proc.h>
     41 #include <sys/buf.h>
     42 #include <sys/mount.h>
     43 #include <sys/vnode.h>
     44 #include <sys/malloc.h>
     45 #include <sys/kernel.h>
     46 
     47 #include <sys/syscallargs.h>
     48 
     49 #include <ufs/ufs/quota.h>
     50 #include <ufs/ufs/inode.h>
     51 #include <ufs/ufs/ufsmount.h>
     52 #include <ufs/ufs/ufs_extern.h>
     53 
     54 #include <ufs/lfs/lfs.h>
     55 #include <ufs/lfs/lfs_extern.h>
     56 #define BUMP_FIP(SP) \
     57 	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
     58 
     59 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
     60 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
     61 
     62 /*
     63  * Before committing to add something to a segment summary, make sure there
     64  * is enough room.  S is the bytes added to the summary.
     65  */
     66 #define	CHECK_SEG(s)			\
     67 if (sp->sum_bytes_left < (s)) {		\
     68 	(void) lfs_writeseg(fs, sp);	\
     69 }
     70 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
     71 
     72 /*
     73  * lfs_markv:
     74  *
     75  * This will mark inodes and blocks dirty, so they are written into the log.
     76  * It will block until all the blocks have been written.  The segment create
     77  * time passed in the block_info and inode_info structures is used to decide
     78  * if the data is valid for each block (in case some process dirtied a block
     79  * or inode that is being cleaned between the determination that a block is
     80  * live and the lfs_markv call).
     81  *
     82  *  0 on success
     83  * -1/errno is return on error.
     84  */
     85 int
     86 lfs_markv(p, v, retval)
     87 	struct proc *p;
     88 	void *v;
     89 	register_t *retval;
     90 {
     91 	struct lfs_markv_args /* {
     92 		syscallarg(fsid_t *) fsidp;
     93 		syscallarg(struct block_info *) blkiov;
     94 		syscallarg(int) blkcnt;
     95 	} */ *uap = v;
     96 	struct segment *sp;
     97 	BLOCK_INFO *blkp;
     98 	IFILE *ifp;
     99 	struct buf *bp, **bpp;
    100 	struct inode *ip;
    101 	struct lfs *fs;
    102 	struct mount *mntp;
    103 	struct vnode *vp;
    104 	fsid_t fsid;
    105 	void *start;
    106 	ino_t lastino;
    107 	daddr_t b_daddr, v_daddr;
    108 	u_long bsize;
    109 	int cnt, error;
    110 
    111 	if (error = suser(p->p_ucred, &p->p_acflag))
    112 		return (error);
    113 
    114 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    115 		return (error);
    116 	if ((mntp = getvfs(&fsid)) == NULL)
    117 		return (EINVAL);
    118 
    119 	cnt = SCARG(uap, blkcnt);
    120 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    121 	if (error = copyin(SCARG(uap, blkiov), start, cnt * sizeof(BLOCK_INFO)))
    122 		goto err1;
    123 
    124 	/* Mark blocks/inodes dirty.  */
    125 	fs = VFSTOUFS(mntp)->um_lfs;
    126 	bsize = fs->lfs_bsize;
    127 	error = 0;
    128 
    129 	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
    130 	sp = fs->lfs_sp;
    131 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
    132 	    blkp = start; cnt--; ++blkp) {
    133 		/*
    134 		 * Get the IFILE entry (only once) and see if the file still
    135 		 * exists.
    136 		 */
    137 		if (lastino != blkp->bi_inode) {
    138 			if (lastino != LFS_UNUSED_INUM) {
    139 				/* Finish up last file */
    140 				if (sp->fip->fi_nblocks == 0) {
    141 					DEC_FINFO(sp);
    142 					sp->sum_bytes_left +=
    143 					    sizeof(FINFO) - sizeof(daddr_t);
    144 				} else {
    145 					lfs_updatemeta(sp);
    146 					BUMP_FIP(sp);
    147 				}
    148 
    149 				lfs_writeinode(fs, sp, ip);
    150 				lfs_vunref(vp);
    151 			}
    152 
    153 			/* Start a new file */
    154 			CHECK_SEG(sizeof(FINFO));
    155 			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
    156 			INC_FINFO(sp);
    157 			sp->start_lbp = &sp->fip->fi_blocks[0];
    158 			sp->vp = NULL;
    159 			sp->fip->fi_version = blkp->bi_version;
    160 			sp->fip->fi_nblocks = 0;
    161 			sp->fip->fi_ino = blkp->bi_inode;
    162 			lastino = blkp->bi_inode;
    163 			if (blkp->bi_inode == LFS_IFILE_INUM)
    164 				v_daddr = fs->lfs_idaddr;
    165 			else {
    166 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
    167 				v_daddr = ifp->if_daddr;
    168 				brelse(bp);
    169 			}
    170 			if (v_daddr == LFS_UNUSED_DADDR)
    171 				continue;
    172 
    173 			/* Get the vnode/inode. */
    174 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
    175 			    blkp->bi_lbn == LFS_UNUSED_LBN ?
    176 			    blkp->bi_bp : NULL)) {
    177 #ifdef DIAGNOSTIC
    178 				printf("lfs_markv: VFS_VGET failed (%d)\n",
    179 				    blkp->bi_inode);
    180 #endif
    181 				lastino = LFS_UNUSED_INUM;
    182 				v_daddr = LFS_UNUSED_DADDR;
    183 				continue;
    184 			}
    185 			sp->vp = vp;
    186 			ip = VTOI(vp);
    187 		} else if (v_daddr == LFS_UNUSED_DADDR)
    188 			continue;
    189 
    190 		/* If this BLOCK_INFO didn't contain a block, keep going. */
    191 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    192 			continue;
    193 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
    194 		    b_daddr != blkp->bi_daddr)
    195 			continue;
    196 		/*
    197 		 * If we got to here, then we are keeping the block.  If it
    198 		 * is an indirect block, we want to actually put it in the
    199 		 * buffer cache so that it can be updated in the finish_meta
    200 		 * section.  If it's not, we need to allocate a fake buffer
    201 		 * so that writeseg can perform the copyin and write the buffer.
    202 		 */
    203 		if (blkp->bi_lbn >= 0)	/* Data Block */
    204 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
    205 			    blkp->bi_bp);
    206 		else {
    207 			bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
    208 			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
    209 			    (error = copyin(blkp->bi_bp, bp->b_data,
    210 			    bsize)))
    211 				goto err2;
    212 			if (error = VOP_BWRITE(bp))
    213 				goto err2;
    214 		}
    215 		while (lfs_gatherblock(sp, bp, NULL));
    216 	}
    217 	if (sp->vp) {
    218 		if (sp->fip->fi_nblocks == 0) {
    219 			DEC_FINFO(sp);
    220 			sp->sum_bytes_left +=
    221 			    sizeof(FINFO) - sizeof(daddr_t);
    222 		} else
    223 			lfs_updatemeta(sp);
    224 
    225 		lfs_writeinode(fs, sp, ip);
    226 		lfs_vunref(vp);
    227 	}
    228 	(void) lfs_writeseg(fs, sp);
    229 	lfs_segunlock(fs);
    230 	free(start, M_SEGMENT);
    231 	return (error);
    232 
    233 /*
    234  * XXX
    235  * If we come in to error 2, we might have indirect blocks that were
    236  * updated and now have bad block pointers.  I don't know what to do
    237  * about this.
    238  */
    239 
    240 err2:	lfs_vunref(vp);
    241 	/* Free up fakebuffers */
    242 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
    243 		if ((*bpp)->b_flags & B_CALL) {
    244 			brelvp(*bpp);
    245 			free(*bpp, M_SEGMENT);
    246 		} else
    247 			brelse(*bpp);
    248 	lfs_segunlock(fs);
    249 err1:
    250 	free(start, M_SEGMENT);
    251 	return (error);
    252 }
    253 
    254 /*
    255  * lfs_bmapv:
    256  *
    257  * This will fill in the current disk address for arrays of blocks.
    258  *
    259  *  0 on success
    260  * -1/errno is return on error.
    261  */
    262 int
    263 lfs_bmapv(p, v, retval)
    264 	struct proc *p;
    265 	void *v;
    266 	register_t *retval;
    267 {
    268 	struct lfs_bmapv_args /* {
    269 		syscallarg(fsid_t *) fsidp;
    270 		syscallarg(struct block_info *) blkiov;
    271 		syscallarg(int) blkcnt;
    272 	} */ *uap = v;
    273 	BLOCK_INFO *blkp;
    274 	struct mount *mntp;
    275 	struct vnode *vp;
    276 	fsid_t fsid;
    277 	void *start;
    278 	daddr_t daddr;
    279 	int cnt, error, step;
    280 
    281 	if (error = suser(p->p_ucred, &p->p_acflag))
    282 		return (error);
    283 
    284 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    285 		return (error);
    286 	if ((mntp = getvfs(&fsid)) == NULL)
    287 		return (EINVAL);
    288 
    289 	cnt = SCARG(uap, blkcnt);
    290 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    291 	if (error = copyin(SCARG(uap, blkiov), blkp,
    292 	    cnt * sizeof(BLOCK_INFO))) {
    293 		free(blkp, M_SEGMENT);
    294 		return (error);
    295 	}
    296 
    297 	for (step = cnt; step--; ++blkp) {
    298 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    299 			continue;
    300 		/* Could be a deadlock ? */
    301 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
    302 			daddr = LFS_UNUSED_DADDR;
    303 		else {
    304 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
    305 				daddr = LFS_UNUSED_DADDR;
    306 			vput(vp);
    307 		}
    308 		blkp->bi_daddr = daddr;
    309         }
    310 	copyout(start, SCARG(uap, blkiov), cnt * sizeof(BLOCK_INFO));
    311 	free(start, M_SEGMENT);
    312 	return (0);
    313 }
    314 
    315 /*
    316  * lfs_segclean:
    317  *
    318  * Mark the segment clean.
    319  *
    320  *  0 on success
    321  * -1/errno is return on error.
    322  */
    323 int
    324 lfs_segclean(p, v, retval)
    325 	struct proc *p;
    326 	void *v;
    327 	register_t *retval;
    328 {
    329 	struct lfs_segclean_args /* {
    330 		syscallarg(fsid_t *) fsidp;
    331 		syscallarg(u_long) segment;
    332 	} */ *uap = v;
    333 	CLEANERINFO *cip;
    334 	SEGUSE *sup;
    335 	struct buf *bp;
    336 	struct mount *mntp;
    337 	struct lfs *fs;
    338 	fsid_t fsid;
    339 	int error;
    340 
    341 	if (error = suser(p->p_ucred, &p->p_acflag))
    342 		return (error);
    343 
    344 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    345 		return (error);
    346 	if ((mntp = getvfs(&fsid)) == NULL)
    347 		return (EINVAL);
    348 
    349 	fs = VFSTOUFS(mntp)->um_lfs;
    350 
    351 	if (datosn(fs, fs->lfs_curseg) == SCARG(uap, segment))
    352 		return (EBUSY);
    353 
    354 	LFS_SEGENTRY(sup, fs, SCARG(uap, segment), bp);
    355 	if (sup->su_flags & SEGUSE_ACTIVE) {
    356 		brelse(bp);
    357 		return (EBUSY);
    358 	}
    359 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
    360 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
    361 	    sup->su_ninos * btodb(fs->lfs_bsize);
    362 	sup->su_flags &= ~SEGUSE_DIRTY;
    363 	(void) VOP_BWRITE(bp);
    364 
    365 	LFS_CLEANERINFO(cip, fs, bp);
    366 	++cip->clean;
    367 	--cip->dirty;
    368 	(void) VOP_BWRITE(bp);
    369 	wakeup(&fs->lfs_avail);
    370 	return (0);
    371 }
    372 
    373 /*
    374  * lfs_segwait:
    375  *
    376  * This will block until a segment in file system fsid is written.  A timeout
    377  * in milliseconds may be specified which will awake the cleaner automatically.
    378  * An fsid of -1 means any file system, and a timeout of 0 means forever.
    379  *
    380  *  0 on success
    381  *  1 on timeout
    382  * -1/errno is return on error.
    383  */
    384 int
    385 lfs_segwait(p, v, retval)
    386 	struct proc *p;
    387 	void *v;
    388 	register_t *retval;
    389 {
    390 	struct lfs_segwait_args /* {
    391 		syscallarg(fsid_t *) fsidp;
    392 		syscallarg(struct timeval *) tv;
    393 	} */ *uap = v;
    394 	extern int lfs_allclean_wakeup;
    395 	struct mount *mntp;
    396 	struct timeval atv;
    397 	fsid_t fsid;
    398 	void *addr;
    399 	u_long timeout;
    400 	int error, s;
    401 
    402 	if (error = suser(p->p_ucred, &p->p_acflag)) {
    403 		return (error);
    404 }
    405 #ifdef WHEN_QUADS_WORK
    406 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    407 		return (error);
    408 	if (fsid == (fsid_t)-1)
    409 		addr = &lfs_allclean_wakeup;
    410 	else {
    411 		if ((mntp = getvfs(&fsid)) == NULL)
    412 			return (EINVAL);
    413 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    414 	}
    415 #else
    416 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    417 		return (error);
    418 	if ((mntp = getvfs(&fsid)) == NULL)
    419 		addr = &lfs_allclean_wakeup;
    420 	else
    421 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    422 #endif
    423 
    424 	if (SCARG(uap, tv)) {
    425 		if (error =
    426 		    copyin(SCARG(uap, tv), &atv, sizeof(struct timeval)))
    427 			return (error);
    428 		if (itimerfix(&atv))
    429 			return (EINVAL);
    430 		s = splclock();
    431 		timeradd(&atv, &time, &atv);
    432 		timeout = hzto(&atv);
    433 		splx(s);
    434 	} else
    435 		timeout = 0;
    436 
    437 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
    438 	return (error == ERESTART ? EINTR : 0);
    439 }
    440 
    441 /*
    442  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
    443  * daddr from the ifile, so don't look it up again.  If the cleaner is
    444  * processing IINFO structures, it may have the ondisk inode already, so
    445  * don't go retrieving it again.
    446  */
    447 int
    448 lfs_fastvget(mp, ino, daddr, vpp, dinp)
    449 	struct mount *mp;
    450 	ino_t ino;
    451 	daddr_t daddr;
    452 	struct vnode **vpp;
    453 	struct dinode *dinp;
    454 {
    455 	register struct inode *ip;
    456 	struct vnode *vp;
    457 	struct ufsmount *ump;
    458 	struct buf *bp;
    459 	dev_t dev;
    460 	int error;
    461 
    462 	ump = VFSTOUFS(mp);
    463 	dev = ump->um_dev;
    464 	/*
    465 	 * This is playing fast and loose.  Someone may have the inode
    466 	 * locked, in which case they are going to be distinctly unhappy
    467 	 * if we trash something.
    468 	 */
    469 	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
    470 		lfs_vref(*vpp);
    471 		if ((*vpp)->v_flag & VXLOCK)
    472 			printf ("Cleaned vnode VXLOCKED\n");
    473 		ip = VTOI(*vpp);
    474 		if (ip->i_flag & IN_LOCKED)
    475 			printf("cleaned vnode locked\n");
    476 		if (!(ip->i_flag & IN_MODIFIED)) {
    477 			++ump->um_lfs->lfs_uinodes;
    478 			ip->i_flag |= IN_MODIFIED;
    479 		}
    480 		ip->i_flag |= IN_MODIFIED;
    481 		return (0);
    482 	}
    483 
    484 	/* Allocate new vnode/inode. */
    485 	if (error = lfs_vcreate(mp, ino, &vp)) {
    486 		*vpp = NULL;
    487 		return (error);
    488 	}
    489 
    490 	/*
    491 	 * Put it onto its hash chain and lock it so that other requests for
    492 	 * this inode will block if they arrive while we are sleeping waiting
    493 	 * for old data structures to be purged or for the contents of the
    494 	 * disk portion of this inode to be read.
    495 	 */
    496 	ip = VTOI(vp);
    497 	ufs_ihashins(ip);
    498 
    499 	/*
    500 	 * XXX
    501 	 * This may not need to be here, logically it should go down with
    502 	 * the i_devvp initialization.
    503 	 * Ask Kirk.
    504 	 */
    505 	ip->i_lfs = ump->um_lfs;
    506 
    507 	/* Read in the disk contents for the inode, copy into the inode. */
    508 	if (dinp)
    509 		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
    510 			return (error);
    511 	else {
    512 		if (error = bread(ump->um_devvp, daddr,
    513 		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
    514 			/*
    515 			 * The inode does not contain anything useful, so it
    516 			 * would be misleading to leave it on its hash chain.
    517 			 * Iput() will return it to the free list.
    518 			 */
    519 			ufs_ihashrem(ip);
    520 
    521 			/* Unlock and discard unneeded inode. */
    522 			lfs_vunref(vp);
    523 			brelse(bp);
    524 			*vpp = NULL;
    525 			return (error);
    526 		}
    527 		ip->i_din =
    528 		    *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
    529 		brelse(bp);
    530 	}
    531 
    532 	/* Inode was just read from user space or disk, make sure it's locked */
    533 	ip->i_flag |= IN_LOCKED;
    534 
    535 	/*
    536 	 * Initialize the vnode from the inode, check for aliases.  In all
    537 	 * cases re-init ip, the underlying vnode/inode may have changed.
    538 	 */
    539 	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
    540 		lfs_vunref(vp);
    541 		*vpp = NULL;
    542 		return (error);
    543 	}
    544 	/*
    545 	 * Finish inode initialization now that aliasing has been resolved.
    546 	 */
    547 	ip->i_devvp = ump->um_devvp;
    548 	ip->i_flag |= IN_MODIFIED;
    549 	++ump->um_lfs->lfs_uinodes;
    550 	VREF(ip->i_devvp);
    551 	*vpp = vp;
    552 	return (0);
    553 }
    554 struct buf *
    555 lfs_fakebuf(vp, lbn, size, uaddr)
    556 	struct vnode *vp;
    557 	int lbn;
    558 	size_t size;
    559 	caddr_t uaddr;
    560 {
    561 	struct buf *bp;
    562 
    563 	bp = lfs_newbuf(vp, lbn, 0);
    564 	bp->b_saveaddr = uaddr;
    565 	bp->b_bufsize = size;
    566 	bp->b_bcount = size;
    567 	bp->b_flags |= B_INVAL;
    568 	return (bp);
    569 }
    570