Home | History | Annotate | Line # | Download | only in lfs
lfs_syscalls.c revision 1.14
      1 /*	$NetBSD: lfs_syscalls.c,v 1.14 1998/01/13 03:30:39 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1991, 1993, 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)lfs_syscalls.c	8.6 (Berkeley) 6/16/94
     36  */
     37 
     38 #include <sys/param.h>
     39 #include <sys/systm.h>
     40 #include <sys/proc.h>
     41 #include <sys/buf.h>
     42 #include <sys/mount.h>
     43 #include <sys/vnode.h>
     44 #include <sys/malloc.h>
     45 #include <sys/kernel.h>
     46 
     47 #include <sys/syscallargs.h>
     48 
     49 #include <ufs/ufs/quota.h>
     50 #include <ufs/ufs/inode.h>
     51 #include <ufs/ufs/ufsmount.h>
     52 #include <ufs/ufs/ufs_extern.h>
     53 
     54 #include <ufs/lfs/lfs.h>
     55 #include <ufs/lfs/lfs_extern.h>
     56 
     57 #define BUMP_FIP(SP) \
     58 	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
     59 
     60 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
     61 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
     62 
     63 /*
     64  * Before committing to add something to a segment summary, make sure there
     65  * is enough room.  S is the bytes added to the summary.
     66  */
     67 #define	CHECK_SEG(s)			\
     68 if (sp->sum_bytes_left < (s)) {		\
     69 	(void) lfs_writeseg(fs, sp);	\
     70 }
     71 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
     72 
     73 /*
     74  * lfs_markv:
     75  *
     76  * This will mark inodes and blocks dirty, so they are written into the log.
     77  * It will block until all the blocks have been written.  The segment create
     78  * time passed in the block_info and inode_info structures is used to decide
     79  * if the data is valid for each block (in case some process dirtied a block
     80  * or inode that is being cleaned between the determination that a block is
     81  * live and the lfs_markv call).
     82  *
     83  *  0 on success
     84  * -1/errno is return on error.
     85  */
     86 int
     87 lfs_markv(p, v, retval)
     88 	struct proc *p;
     89 	void *v;
     90 	register_t *retval;
     91 {
     92 	struct lfs_markv_args /* {
     93 		syscallarg(fsid_t *) fsidp;
     94 		syscallarg(struct block_info *) blkiov;
     95 		syscallarg(int) blkcnt;
     96 	} */ *uap = v;
     97 	struct segment *sp;
     98 	BLOCK_INFO *blkp;
     99 	IFILE *ifp;
    100 	struct buf *bp, **bpp;
    101 	struct inode *ip = NULL;
    102 	struct lfs *fs;
    103 	struct mount *mntp;
    104 	struct vnode *vp;
    105 	fsid_t fsid;
    106 	void *start;
    107 	ino_t lastino;
    108 	daddr_t b_daddr, v_daddr;
    109 	u_long bsize;
    110 	int cnt, error;
    111 
    112 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    113 		return (error);
    114 
    115 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
    116 		return (error);
    117 	if ((mntp = getvfs(&fsid)) == NULL)
    118 		return (EINVAL);
    119 
    120 	cnt = SCARG(uap, blkcnt);
    121 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    122 	error = copyin(SCARG(uap, blkiov), start, cnt * sizeof(BLOCK_INFO));
    123 	if (error)
    124 		goto err1;
    125 
    126 	/* Mark blocks/inodes dirty.  */
    127 	fs = VFSTOUFS(mntp)->um_lfs;
    128 	bsize = fs->lfs_bsize;
    129 	error = 0;
    130 
    131 	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
    132 	sp = fs->lfs_sp;
    133 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
    134 	    blkp = start; cnt--; ++blkp) {
    135 		/*
    136 		 * Get the IFILE entry (only once) and see if the file still
    137 		 * exists.
    138 		 */
    139 		if (lastino != blkp->bi_inode) {
    140 			if (lastino != LFS_UNUSED_INUM) {
    141 				/* Finish up last file */
    142 				if (sp->fip->fi_nblocks == 0) {
    143 					DEC_FINFO(sp);
    144 					sp->sum_bytes_left +=
    145 					    sizeof(FINFO) - sizeof(daddr_t);
    146 				} else {
    147 					lfs_updatemeta(sp);
    148 					BUMP_FIP(sp);
    149 				}
    150 
    151 				lfs_writeinode(fs, sp, ip);
    152 				lfs_vunref(vp);
    153 			}
    154 
    155 			/* Start a new file */
    156 			CHECK_SEG(sizeof(FINFO));
    157 			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
    158 			INC_FINFO(sp);
    159 			sp->start_lbp = &sp->fip->fi_blocks[0];
    160 			sp->vp = NULL;
    161 			sp->fip->fi_version = blkp->bi_version;
    162 			sp->fip->fi_nblocks = 0;
    163 			sp->fip->fi_ino = blkp->bi_inode;
    164 			lastino = blkp->bi_inode;
    165 			if (blkp->bi_inode == LFS_IFILE_INUM)
    166 				v_daddr = fs->lfs_idaddr;
    167 			else {
    168 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
    169 				v_daddr = ifp->if_daddr;
    170 				brelse(bp);
    171 			}
    172 			if (v_daddr == LFS_UNUSED_DADDR)
    173 				continue;
    174 
    175 			/* Get the vnode/inode. */
    176 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
    177 			    blkp->bi_lbn == LFS_UNUSED_LBN ?
    178 			    blkp->bi_bp : NULL)) {
    179 #ifdef DIAGNOSTIC
    180 				printf("lfs_markv: VFS_VGET failed (%d)\n",
    181 				    blkp->bi_inode);
    182 #endif
    183 				lastino = LFS_UNUSED_INUM;
    184 				v_daddr = LFS_UNUSED_DADDR;
    185 				continue;
    186 			}
    187 			sp->vp = vp;
    188 			ip = VTOI(vp);
    189 		} else if (v_daddr == LFS_UNUSED_DADDR)
    190 			continue;
    191 
    192 		/* If this BLOCK_INFO didn't contain a block, keep going. */
    193 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    194 			continue;
    195 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
    196 		    b_daddr != blkp->bi_daddr)
    197 			continue;
    198 		/*
    199 		 * If we got to here, then we are keeping the block.  If it
    200 		 * is an indirect block, we want to actually put it in the
    201 		 * buffer cache so that it can be updated in the finish_meta
    202 		 * section.  If it's not, we need to allocate a fake buffer
    203 		 * so that writeseg can perform the copyin and write the buffer.
    204 		 */
    205 		if (blkp->bi_lbn >= 0)	/* Data Block */
    206 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
    207 			    blkp->bi_bp);
    208 		else {
    209 			bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
    210 			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
    211 			    (error = copyin(blkp->bi_bp, bp->b_data,
    212 			    bsize)))
    213 				goto err2;
    214 			if ((error = VOP_BWRITE(bp)) != 0)
    215 				goto err2;
    216 		}
    217 		while (lfs_gatherblock(sp, bp, NULL));
    218 	}
    219 	if (sp->vp) {
    220 		if (sp->fip->fi_nblocks == 0) {
    221 			DEC_FINFO(sp);
    222 			sp->sum_bytes_left +=
    223 			    sizeof(FINFO) - sizeof(daddr_t);
    224 		} else
    225 			lfs_updatemeta(sp);
    226 
    227 		lfs_writeinode(fs, sp, ip);
    228 		lfs_vunref(vp);
    229 	}
    230 	(void) lfs_writeseg(fs, sp);
    231 	lfs_segunlock(fs);
    232 	free(start, M_SEGMENT);
    233 	return (error);
    234 
    235 /*
    236  * XXX
    237  * If we come in to error 2, we might have indirect blocks that were
    238  * updated and now have bad block pointers.  I don't know what to do
    239  * about this.
    240  */
    241 
    242 err2:	lfs_vunref(vp);
    243 	/* Free up fakebuffers */
    244 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
    245 		if ((*bpp)->b_flags & B_CALL) {
    246 			brelvp(*bpp);
    247 			free(*bpp, M_SEGMENT);
    248 		} else
    249 			brelse(*bpp);
    250 	lfs_segunlock(fs);
    251 err1:
    252 	free(start, M_SEGMENT);
    253 	return (error);
    254 }
    255 
    256 /*
    257  * lfs_bmapv:
    258  *
    259  * This will fill in the current disk address for arrays of blocks.
    260  *
    261  *  0 on success
    262  * -1/errno is return on error.
    263  */
    264 int
    265 lfs_bmapv(p, v, retval)
    266 	struct proc *p;
    267 	void *v;
    268 	register_t *retval;
    269 {
    270 	struct lfs_bmapv_args /* {
    271 		syscallarg(fsid_t *) fsidp;
    272 		syscallarg(struct block_info *) blkiov;
    273 		syscallarg(int) blkcnt;
    274 	} */ *uap = v;
    275 	BLOCK_INFO *blkp;
    276 	struct mount *mntp;
    277 	struct vnode *vp;
    278 	fsid_t fsid;
    279 	void *start;
    280 	daddr_t daddr;
    281 	int cnt, error, step;
    282 
    283 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    284 		return (error);
    285 
    286 	error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t));
    287 	if (error)
    288 		return (error);
    289 	if ((mntp = getvfs(&fsid)) == NULL)
    290 		return (EINVAL);
    291 
    292 	cnt = SCARG(uap, blkcnt);
    293 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    294 	error = copyin(SCARG(uap, blkiov), blkp, cnt * sizeof(BLOCK_INFO));
    295 	if (error) {
    296 		free(blkp, M_SEGMENT);
    297 		return (error);
    298 	}
    299 
    300 	for (step = cnt; step--; ++blkp) {
    301 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    302 			continue;
    303 		/* Could be a deadlock ? */
    304 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
    305 			daddr = LFS_UNUSED_DADDR;
    306 		else {
    307 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
    308 				daddr = LFS_UNUSED_DADDR;
    309 			vput(vp);
    310 		}
    311 		blkp->bi_daddr = daddr;
    312         }
    313 	copyout(start, SCARG(uap, blkiov), cnt * sizeof(BLOCK_INFO));
    314 	free(start, M_SEGMENT);
    315 	return (0);
    316 }
    317 
    318 /*
    319  * lfs_segclean:
    320  *
    321  * Mark the segment clean.
    322  *
    323  *  0 on success
    324  * -1/errno is return on error.
    325  */
    326 int
    327 lfs_segclean(p, v, retval)
    328 	struct proc *p;
    329 	void *v;
    330 	register_t *retval;
    331 {
    332 	struct lfs_segclean_args /* {
    333 		syscallarg(fsid_t *) fsidp;
    334 		syscallarg(u_long) segment;
    335 	} */ *uap = v;
    336 	CLEANERINFO *cip;
    337 	SEGUSE *sup;
    338 	struct buf *bp;
    339 	struct mount *mntp;
    340 	struct lfs *fs;
    341 	fsid_t fsid;
    342 	int error;
    343 
    344 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    345 		return (error);
    346 
    347 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
    348 		return (error);
    349 	if ((mntp = getvfs(&fsid)) == NULL)
    350 		return (EINVAL);
    351 
    352 	fs = VFSTOUFS(mntp)->um_lfs;
    353 
    354 	if (datosn(fs, fs->lfs_curseg) == SCARG(uap, segment))
    355 		return (EBUSY);
    356 
    357 	LFS_SEGENTRY(sup, fs, SCARG(uap, segment), bp);
    358 	if (sup->su_flags & SEGUSE_ACTIVE) {
    359 		brelse(bp);
    360 		return (EBUSY);
    361 	}
    362 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
    363 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
    364 	    sup->su_ninos * btodb(fs->lfs_bsize);
    365 	sup->su_flags &= ~SEGUSE_DIRTY;
    366 	(void) VOP_BWRITE(bp);
    367 
    368 	LFS_CLEANERINFO(cip, fs, bp);
    369 	++cip->clean;
    370 	--cip->dirty;
    371 	(void) VOP_BWRITE(bp);
    372 	wakeup(&fs->lfs_avail);
    373 	return (0);
    374 }
    375 
    376 /*
    377  * lfs_segwait:
    378  *
    379  * This will block until a segment in file system fsid is written.  A timeout
    380  * in milliseconds may be specified which will awake the cleaner automatically.
    381  * An fsid of -1 means any file system, and a timeout of 0 means forever.
    382  *
    383  *  0 on success
    384  *  1 on timeout
    385  * -1/errno is return on error.
    386  */
    387 int
    388 lfs_segwait(p, v, retval)
    389 	struct proc *p;
    390 	void *v;
    391 	register_t *retval;
    392 {
    393 	struct lfs_segwait_args /* {
    394 		syscallarg(fsid_t *) fsidp;
    395 		syscallarg(struct timeval *) tv;
    396 	} */ *uap = v;
    397 	extern int lfs_allclean_wakeup;
    398 	struct mount *mntp;
    399 	struct timeval atv;
    400 	fsid_t fsid;
    401 	void *addr;
    402 	u_long timeout;
    403 	int error, s;
    404 
    405 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) {
    406 		return (error);
    407 }
    408 #ifdef WHEN_QUADS_WORK
    409 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    410 		return (error);
    411 	if (fsid == (fsid_t)-1)
    412 		addr = &lfs_allclean_wakeup;
    413 	else {
    414 		if ((mntp = getvfs(&fsid)) == NULL)
    415 			return (EINVAL);
    416 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    417 	}
    418 #else
    419 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
    420 		return (error);
    421 	if ((mntp = getvfs(&fsid)) == NULL)
    422 		addr = &lfs_allclean_wakeup;
    423 	else
    424 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    425 #endif
    426 
    427 	if (SCARG(uap, tv)) {
    428 		error = copyin(SCARG(uap, tv), &atv, sizeof(struct timeval));
    429 		if (error)
    430 			return (error);
    431 		if (itimerfix(&atv))
    432 			return (EINVAL);
    433 		s = splclock();
    434 		timeradd(&atv, &time, &atv);
    435 		timeout = hzto(&atv);
    436 		splx(s);
    437 	} else
    438 		timeout = 0;
    439 
    440 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
    441 	return (error == ERESTART ? EINTR : 0);
    442 }
    443 
    444 /*
    445  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
    446  * daddr from the ifile, so don't look it up again.  If the cleaner is
    447  * processing IINFO structures, it may have the ondisk inode already, so
    448  * don't go retrieving it again.
    449  */
    450 int
    451 lfs_fastvget(mp, ino, daddr, vpp, dinp)
    452 	struct mount *mp;
    453 	ino_t ino;
    454 	daddr_t daddr;
    455 	struct vnode **vpp;
    456 	struct dinode *dinp;
    457 {
    458 	register struct inode *ip;
    459 	struct vnode *vp;
    460 	struct ufsmount *ump;
    461 	struct buf *bp;
    462 	dev_t dev;
    463 	int error;
    464 
    465 	ump = VFSTOUFS(mp);
    466 	dev = ump->um_dev;
    467 	/*
    468 	 * This is playing fast and loose.  Someone may have the inode
    469 	 * locked, in which case they are going to be distinctly unhappy
    470 	 * if we trash something.
    471 	 */
    472 	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
    473 		lfs_vref(*vpp);
    474 		if ((*vpp)->v_flag & VXLOCK)
    475 			printf ("Cleaned vnode VXLOCKED\n");
    476 		ip = VTOI(*vpp);
    477 		if (ip->i_flag & IN_LOCKED)
    478 			printf("cleaned vnode locked\n");
    479 		if (!(ip->i_flag & IN_MODIFIED)) {
    480 			++ump->um_lfs->lfs_uinodes;
    481 			ip->i_flag |= IN_MODIFIED;
    482 		}
    483 		ip->i_flag |= IN_MODIFIED;
    484 		return (0);
    485 	}
    486 
    487 	/* Allocate new vnode/inode. */
    488 	if ((error = lfs_vcreate(mp, ino, &vp)) != 0) {
    489 		*vpp = NULL;
    490 		return (error);
    491 	}
    492 
    493 	/*
    494 	 * Put it onto its hash chain and lock it so that other requests for
    495 	 * this inode will block if they arrive while we are sleeping waiting
    496 	 * for old data structures to be purged or for the contents of the
    497 	 * disk portion of this inode to be read.
    498 	 */
    499 	ip = VTOI(vp);
    500 	ufs_ihashins(ip);
    501 
    502 	/*
    503 	 * XXX
    504 	 * This may not need to be here, logically it should go down with
    505 	 * the i_devvp initialization.
    506 	 * Ask Kirk.
    507 	 */
    508 	ip->i_lfs = ump->um_lfs;
    509 
    510 	/* Read in the disk contents for the inode, copy into the inode. */
    511 	if (dinp) {
    512 		error = copyin(dinp, &ip->i_din.ffs_din, sizeof(struct dinode));
    513 		if (error)
    514 			return (error);
    515 	}
    516 	else {
    517 		error = bread(ump->um_devvp, daddr,
    518 			      (int)ump->um_lfs->lfs_bsize, NOCRED, &bp);
    519 		if (error) {
    520 			/*
    521 			 * The inode does not contain anything useful, so it
    522 			 * would be misleading to leave it on its hash chain.
    523 			 * Iput() will return it to the free list.
    524 			 */
    525 			ufs_ihashrem(ip);
    526 
    527 			/* Unlock and discard unneeded inode. */
    528 			lfs_vunref(vp);
    529 			brelse(bp);
    530 			*vpp = NULL;
    531 			return (error);
    532 		}
    533 		ip->i_din.ffs_din =
    534 		    *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
    535 		brelse(bp);
    536 	}
    537 
    538 	/* Inode was just read from user space or disk, make sure it's locked */
    539 	ip->i_flag |= IN_LOCKED;
    540 
    541 	/*
    542 	 * Initialize the vnode from the inode, check for aliases.  In all
    543 	 * cases re-init ip, the underlying vnode/inode may have changed.
    544 	 */
    545 	error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp);
    546 	if (error) {
    547 		lfs_vunref(vp);
    548 		*vpp = NULL;
    549 		return (error);
    550 	}
    551 	/*
    552 	 * Finish inode initialization now that aliasing has been resolved.
    553 	 */
    554 	ip->i_devvp = ump->um_devvp;
    555 	ip->i_flag |= IN_MODIFIED;
    556 	++ump->um_lfs->lfs_uinodes;
    557 	VREF(ip->i_devvp);
    558 	*vpp = vp;
    559 	return (0);
    560 }
    561 struct buf *
    562 lfs_fakebuf(vp, lbn, size, uaddr)
    563 	struct vnode *vp;
    564 	int lbn;
    565 	size_t size;
    566 	caddr_t uaddr;
    567 {
    568 	struct buf *bp;
    569 
    570 	bp = lfs_newbuf(vp, lbn, 0);
    571 	bp->b_saveaddr = uaddr;
    572 	bp->b_bufsize = size;
    573 	bp->b_bcount = size;
    574 	bp->b_flags |= B_INVAL;
    575 	return (bp);
    576 }
    577