Home | History | Annotate | Line # | Download | only in genfs
genfs_vnops.c revision 1.11.4.3
      1 /*	$NetBSD: genfs_vnops.c,v 1.11.4.3 1999/07/12 02:40:30 chs Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1982, 1986, 1989, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  */
     36 
     37 #include "opt_nfsserver.h"
     38 
     39 #include <sys/param.h>
     40 #include <sys/systm.h>
     41 #include <sys/proc.h>
     42 #include <sys/kernel.h>
     43 #include <sys/mount.h>
     44 #include <sys/namei.h>
     45 #include <sys/vnode.h>
     46 #include <sys/malloc.h>
     47 #include <sys/poll.h>
     48 
     49 #include <miscfs/genfs/genfs.h>
     50 #include <miscfs/specfs/specdev.h>
     51 
     52 #include <vm/vm.h>
     53 #include <uvm/uvm.h>
     54 
     55 #ifdef NFSSERVER
     56 #include <nfs/rpcv2.h>
     57 #include <nfs/nfsproto.h>
     58 #include <nfs/nfs.h>
     59 #include <nfs/nqnfs.h>
     60 #include <nfs/nfs_var.h>
     61 #endif
     62 
     63 int
     64 genfs_poll(v)
     65 	void *v;
     66 {
     67 	struct vop_poll_args /* {
     68 		struct vnode *a_vp;
     69 		int a_events;
     70 		struct proc *a_p;
     71 	} */ *ap = v;
     72 
     73 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
     74 }
     75 
     76 int
     77 genfs_fsync(v)
     78 	void *v;
     79 {
     80 	struct vop_fsync_args /* {
     81 		struct vnode *a_vp;
     82 		struct ucred *a_cred;
     83 		int a_flags;
     84 		struct proc *a_p;
     85 	} */ *ap = v;
     86 	register struct vnode *vp = ap->a_vp;
     87 	int wait;
     88 
     89 	wait = (ap->a_flags & FSYNC_WAIT) != 0;
     90 	vflushbuf(vp, wait);
     91 	if ((ap->a_flags & FSYNC_DATAONLY) != 0)
     92 		return (0);
     93 	else
     94 		return (VOP_UPDATE(ap->a_vp, NULL, NULL, wait));
     95 }
     96 
     97 int
     98 genfs_seek(v)
     99 	void *v;
    100 {
    101 	struct vop_seek_args /* {
    102 		struct vnode *a_vp;
    103 		off_t a_oldoff;
    104 		off_t a_newoff;
    105 		struct ucred *a_ucred;
    106 	} */ *ap = v;
    107 
    108 	if (ap->a_newoff < 0)
    109 		return (EINVAL);
    110 
    111 	return (0);
    112 }
    113 
    114 int
    115 genfs_abortop(v)
    116 	void *v;
    117 {
    118 	struct vop_abortop_args /* {
    119 		struct vnode *a_dvp;
    120 		struct componentname *a_cnp;
    121 	} */ *ap = v;
    122 
    123 	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
    124 		FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
    125 	return (0);
    126 }
    127 
    128 /*ARGSUSED*/
    129 int
    130 genfs_badop(v)
    131 	void *v;
    132 {
    133 
    134 	panic("genfs: bad op");
    135 }
    136 
    137 /*ARGSUSED*/
    138 int
    139 genfs_nullop(v)
    140 	void *v;
    141 {
    142 
    143 	return (0);
    144 }
    145 
    146 /*ARGSUSED*/
    147 int
    148 genfs_einval(v)
    149 	void *v;
    150 {
    151 
    152 	return (EINVAL);
    153 }
    154 
    155 /*ARGSUSED*/
    156 int
    157 genfs_eopnotsupp(v)
    158 	void *v;
    159 {
    160 
    161 	return (EOPNOTSUPP);
    162 }
    163 
    164 /*ARGSUSED*/
    165 int
    166 genfs_ebadf(v)
    167 	void *v;
    168 {
    169 
    170 	return (EBADF);
    171 }
    172 
    173 /* ARGSUSED */
    174 int
    175 genfs_enoioctl(v)
    176 	void *v;
    177 {
    178 
    179 	return (ENOTTY);
    180 }
    181 
    182 
    183 /*
    184  * Eliminate all activity associated with  the requested vnode
    185  * and with all vnodes aliased to the requested vnode.
    186  */
    187 int
    188 genfs_revoke(v)
    189 	void *v;
    190 {
    191 	struct vop_revoke_args /* {
    192 		struct vnode *a_vp;
    193 		int a_flags;
    194 	} */ *ap = v;
    195 	struct vnode *vp, *vq;
    196 	struct proc *p = curproc;	/* XXX */
    197 
    198 #ifdef DIAGNOSTIC
    199 	if ((ap->a_flags & REVOKEALL) == 0)
    200 		panic("genfs_revoke: not revokeall");
    201 #endif
    202 
    203 	vp = ap->a_vp;
    204 	simple_lock(&vp->v_interlock);
    205 
    206 	if (vp->v_flag & VALIASED) {
    207 		/*
    208 		 * If a vgone (or vclean) is already in progress,
    209 		 * wait until it is done and return.
    210 		 */
    211 		if (vp->v_flag & VXLOCK) {
    212 			vp->v_flag |= VXWANT;
    213 			simple_unlock(&vp->v_interlock);
    214 			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
    215 			return (0);
    216 		}
    217 		/*
    218 		 * Ensure that vp will not be vgone'd while we
    219 		 * are eliminating its aliases.
    220 		 */
    221 		vp->v_flag |= VXLOCK;
    222 		simple_unlock(&vp->v_interlock);
    223 		while (vp->v_flag & VALIASED) {
    224 			simple_lock(&spechash_slock);
    225 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
    226 				if (vq->v_rdev != vp->v_rdev ||
    227 				    vq->v_type != vp->v_type || vp == vq)
    228 					continue;
    229 				simple_unlock(&spechash_slock);
    230 				vgone(vq);
    231 				break;
    232 			}
    233 			if (vq == NULLVP)
    234 				simple_unlock(&spechash_slock);
    235 		}
    236 		/*
    237 		 * Remove the lock so that vgone below will
    238 		 * really eliminate the vnode after which time
    239 		 * vgone will awaken any sleepers.
    240 		 */
    241 		simple_lock(&vp->v_interlock);
    242 		vp->v_flag &= ~VXLOCK;
    243 	}
    244 	vgonel(vp, p);
    245 	return (0);
    246 }
    247 
    248 
    249 /*
    250  * Stubs to use when there is no locking to be done on the underlying object.
    251  * A minimal shared lock is necessary to ensure that the underlying object
    252  * is not revoked while an operation is in progress. So, an active shared
    253  * count is maintained in an auxillary vnode lock structure.
    254  */
    255 int
    256 genfs_nolock(v)
    257 	void *v;
    258 {
    259 	struct vop_lock_args /* {
    260 		struct vnode *a_vp;
    261 		int a_flags;
    262 		struct proc *a_p;
    263 	} */ *ap = v;
    264 
    265 #ifdef notyet
    266 	/*
    267 	 * This code cannot be used until all the non-locking filesystems
    268 	 * (notably NFS) are converted to properly lock and release nodes.
    269 	 * Also, certain vnode operations change the locking state within
    270 	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
    271 	 * and symlink). Ideally these operations should not change the
    272 	 * lock state, but should be changed to let the caller of the
    273 	 * function unlock them. Otherwise all intermediate vnode layers
    274 	 * (such as union, umapfs, etc) must catch these functions to do
    275 	 * the necessary locking at their layer. Note that the inactive
    276 	 * and lookup operations also change their lock state, but this
    277 	 * cannot be avoided, so these two operations will always need
    278 	 * to be handled in intermediate layers.
    279 	 */
    280 	struct vnode *vp = ap->a_vp;
    281 	int vnflags, flags = ap->a_flags;
    282 
    283 	if (vp->v_vnlock == NULL) {
    284 		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
    285 			return (0);
    286 		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
    287 		    M_VNODE, M_WAITOK);
    288 		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
    289 	}
    290 	switch (flags & LK_TYPE_MASK) {
    291 	case LK_DRAIN:
    292 		vnflags = LK_DRAIN;
    293 		break;
    294 	case LK_EXCLUSIVE:
    295 	case LK_SHARED:
    296 		vnflags = LK_SHARED;
    297 		break;
    298 	case LK_UPGRADE:
    299 	case LK_EXCLUPGRADE:
    300 	case LK_DOWNGRADE:
    301 		return (0);
    302 	case LK_RELEASE:
    303 	default:
    304 		panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
    305 	}
    306 	if (flags & LK_INTERLOCK)
    307 		vnflags |= LK_INTERLOCK;
    308 	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock));
    309 #else /* for now */
    310 	/*
    311 	 * Since we are not using the lock manager, we must clear
    312 	 * the interlock here.
    313 	 */
    314 	if (ap->a_flags & LK_INTERLOCK)
    315 		simple_unlock(&ap->a_vp->v_interlock);
    316 	return (0);
    317 #endif
    318 }
    319 
    320 /*
    321  * Decrement the active use count.
    322  */
    323 int
    324 genfs_nounlock(v)
    325 	void *v;
    326 {
    327 	struct vop_unlock_args /* {
    328 		struct vnode *a_vp;
    329 		int a_flags;
    330 		struct proc *a_p;
    331 	} */ *ap = v;
    332 	struct vnode *vp = ap->a_vp;
    333 
    334 	if (vp->v_vnlock == NULL)
    335 		return (0);
    336 	return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL));
    337 }
    338 
    339 /*
    340  * Return whether or not the node is in use.
    341  */
    342 int
    343 genfs_noislocked(v)
    344 	void *v;
    345 {
    346 	struct vop_islocked_args /* {
    347 		struct vnode *a_vp;
    348 	} */ *ap = v;
    349 	struct vnode *vp = ap->a_vp;
    350 
    351 	if (vp->v_vnlock == NULL)
    352 		return (0);
    353 	return (lockstatus(vp->v_vnlock));
    354 }
    355 
    356 /*
    357  * Local lease check for NFS servers.  Just set up args and let
    358  * nqsrv_getlease() do the rest.  If NFSSERVER is not in the kernel,
    359  * this is a null operation.
    360  */
    361 int
    362 genfs_lease_check(v)
    363 	void *v;
    364 {
    365 #ifdef NFSSERVER
    366 	struct vop_lease_args /* {
    367 		struct vnode *a_vp;
    368 		struct proc *a_p;
    369 		struct ucred *a_cred;
    370 		int a_flag;
    371 	} */ *ap = v;
    372 	u_int32_t duration = 0;
    373 	int cache;
    374 	u_quad_t frev;
    375 
    376 	(void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag,
    377 	    NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred);
    378 	return (0);
    379 #else
    380 	return (0);
    381 #endif /* NFSSERVER */
    382 }
    383 
    384 
    385 /*
    386  * generic VM getpages routine.
    387  * Return PG_BUSY pages for the given range,
    388  * reading from backing store if necessary.
    389  */
    390 int
    391 genfs_getpages(v)
    392 	void *v;
    393 {
    394 	struct vop_getpages_args /* {
    395 		struct vnode *a_vp;
    396 		vaddr_t a_offset;
    397 		vm_page_t *a_m;
    398 		int *a_count;
    399 		int a_centeridx;
    400 		vm_prot_t a_access_type;
    401 		int a_advice;
    402 		int a_flags;
    403 	} */ *ap = v;
    404 
    405 	off_t offset, origoffset, startoffset;
    406 	daddr_t lbn, blkno;
    407 	int s, i, error, npages, run, cidx, pidx, pcount;
    408 	int bsize, bshift, dev_bshift, dev_bsize;
    409 	int flags = ap->a_flags;
    410 	size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
    411 	boolean_t sawhole = FALSE;
    412 	char *kva;
    413 	struct buf *bp, *mbp;
    414 	struct vnode *vp = ap->a_vp;
    415 	struct uvm_object *uobj = &vp->v_uvm.u_obj;
    416 	struct vm_page *pg, *pgs[16];			/* XXX 16 */
    417 	struct ucred *cred = curproc->p_ucred;		/* XXX curproc */
    418 	UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
    419 
    420 #ifdef DIAGNOSTIC
    421 	if (ap->a_centeridx < 0 || ap->a_centeridx > *ap->a_count) {
    422 		panic("genfs_getpages: centeridx %d out of range",
    423 		      ap->a_centeridx);
    424 	}
    425 	if (ap->a_offset & (PAGE_SIZE - 1)) {
    426 		panic("genfs_getpages: offset 0x%x", (int)ap->a_offset);
    427 	}
    428 #endif
    429 
    430 	/*
    431 	 * Bounds-check the request.
    432 	 */
    433 
    434 	if (ap->a_offset >= vp->v_uvm.u_size) {
    435 		if ((flags & PGO_LOCKED) == 0) {
    436 			simple_unlock(&uobj->vmobjlock);
    437 		}
    438 		UVMHIST_LOG(ubchist, "off 0x%x past EOF 0x%x",
    439 			    (int)ap->a_offset, (int)vp->v_uvm.u_size,0,0);
    440 		return EINVAL;
    441 	}
    442 
    443 	/*
    444 	 * For PGO_LOCKED requests, just return whatever's in memory.
    445 	 */
    446 
    447 	if (flags & PGO_LOCKED) {
    448 		uvn_findpages(uobj, ap->a_offset, ap->a_count, ap->a_m,
    449 			      UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY);
    450 
    451 		return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
    452 	}
    453 
    454 	/* vnode is VOP_LOCKed, uobj is locked */
    455 
    456 	error = 0;
    457 
    458 	/*
    459 	 * find our center page and make some simple checks.
    460 	 */
    461 
    462 	origoffset = ap->a_offset + (ap->a_centeridx << PAGE_SHIFT);
    463 	pg = NULL;
    464 	npages = 1;
    465 	uvn_findpages(uobj, origoffset, &npages, &pg, 0);
    466 
    467 	/*
    468 	 * if PGO_OVERWRITE is set, don't bother reading the page.
    469 	 * PGO_OVERWRITE also means that the caller guarantees
    470 	 * that the page already has backing store allocated.
    471 	 */
    472 
    473 	if (flags & PGO_OVERWRITE) {
    474 		UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
    475 
    476 		/* XXX for now, zero the page if we allocated it */
    477 		if (pg->flags & PG_FAKE) {
    478 			uvm_pagezero(pg);
    479 		}
    480 
    481 		simple_unlock(&uobj->vmobjlock);
    482 		pgs[0] = pg;
    483 		goto out;
    484 	}
    485 
    486 	/*
    487 	 * if the page is already resident, just return it.
    488 	 */
    489 
    490 	if ((pg->flags & PG_FAKE) == 0 &&
    491 	    !((ap->a_access_type & VM_PROT_WRITE) &&
    492 	      (pg->flags & PG_RDONLY))) {
    493 		UVMHIST_LOG(ubchist, "returning cached pg %p", pg,0,0,0);
    494 
    495 		simple_unlock(&uobj->vmobjlock);
    496 		pgs[0] = pg;
    497 		goto out;
    498 	}
    499 
    500 	/*
    501 	 * the page wasn't resident and we're not overwriting,
    502 	 * so we're going to have to do some i/o.
    503 	 * expand the fault to cover at least 1 block.
    504 	 */
    505 
    506 	bshift = vp->v_mount->mnt_fs_bshift;
    507 	bsize = 1 << bshift;
    508 	dev_bshift = vp->v_mount->mnt_dev_bshift;
    509 	dev_bsize = 1 << dev_bshift;
    510 
    511 	startoffset = offset = origoffset & ~(bsize - 1);
    512 	cidx = (origoffset - offset) >> PAGE_SHIFT;
    513 	npages = max(*ap->a_count + cidx, bsize >> PAGE_SHIFT);
    514 
    515 	if (npages == 1) {
    516 		pgs[0] = pg;
    517 	} else {
    518 		int n = npages;
    519 		memset(pgs, 0, sizeof(pgs));
    520 		pgs[cidx] = PGO_DONTCARE;
    521 		uvn_findpages(uobj, offset, &n, pgs, 0);
    522 		pgs[cidx] = pg;
    523 	}
    524 	simple_unlock(&uobj->vmobjlock);
    525 
    526 	/*
    527 	 * read the desired page(s).
    528 	 */
    529 
    530 	totalbytes = npages << PAGE_SHIFT;
    531 	bytes = min(totalbytes, (vp->v_uvm.u_size - offset + dev_bsize - 1) &
    532 		    ~(dev_bsize - 1));
    533 	tailbytes = totalbytes - bytes;
    534 	skipbytes = 0;
    535 
    536 	kva = (void *)uvm_pagermapin(pgs, npages, M_WAITOK);
    537 
    538 	s = splbio();
    539 	mbp = pool_get(&bufpool, PR_WAITOK);
    540 	splx(s);
    541 	mbp->b_bufsize = bytes;
    542 	mbp->b_data = (void *)kva;
    543 	mbp->b_resid = mbp->b_bcount = bytes;
    544 	mbp->b_flags = B_BUSY|B_READ| (flags & PGO_SYNCIO ? 0 : B_CALL);
    545 	mbp->b_iodone = uvm_aio_biodone;
    546 	mbp->b_vp = vp;
    547 
    548 	bp = NULL;
    549 	for (; bytes > 0; offset += iobytes, bytes -= iobytes) {
    550 
    551 		/*
    552 		 * skip pages which don't need to be read.
    553 		 */
    554 
    555 		pidx = (offset - startoffset) >> PAGE_SHIFT;
    556 		while ((pgs[pidx]->flags & PG_FAKE) == 0) {
    557 			size_t b;
    558 
    559 			if (offset & (PAGE_SIZE - 1)) {
    560 				panic("genfs_getpages: skipping from middle "
    561 				      "of page");
    562 			}
    563 
    564 			b = min(PAGE_SIZE, bytes);
    565 			offset += b;
    566 			bytes -= b;
    567 			skipbytes += b;
    568 			pidx++;
    569 			if (bytes == 0) {
    570 				goto loopdone;
    571 			}
    572 		}
    573 
    574 		/*
    575 		 * bmap the file to find out the blkno to read from and
    576 		 * how much we can read in one i/o.
    577 		 */
    578 
    579 		lbn = offset >> bshift;
    580 		error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
    581 		if (error) {
    582 			UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
    583 				    lbn, error,0,0);
    584 			goto looperr;
    585 		}
    586 
    587 		/*
    588 		 * see how many pages need to be read with this i/o.
    589 		 * reduce the i/o size if necessary.
    590 		 */
    591 
    592 		iobytes = min(((lbn + 1 + run) << bshift) - offset, bytes);
    593 		if (offset + iobytes > round_page(offset)) {
    594 			pcount = 1;
    595 			while (pidx + pcount < npages &&
    596 			       pgs[pidx + pcount]->flags & PG_FAKE) {
    597 				pcount++;
    598 			}
    599 			iobytes = min(iobytes, (pcount << PAGE_SHIFT) -
    600 				      (offset - trunc_page(offset)));
    601 		}
    602 
    603 		/*
    604 		 * if this block isn't allocated, zero it instead of reading it.
    605 		 */
    606 
    607 		if (blkno == (daddr_t)-1) {
    608 			UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
    609 
    610 			sawhole = TRUE;
    611 			memset(kva + (offset - startoffset), 0,
    612 			       min(1 << bshift, (npages << PAGE_SHIFT) -
    613 				   (offset - startoffset)));
    614 			continue;
    615 		}
    616 
    617 		/*
    618 		 * allocate a sub-buf for this piece of the i/o
    619 		 * (or just use mbp if there's only 1 piece),
    620 		 * and start it going.
    621 		 */
    622 
    623 		if (bp == NULL && iobytes == bytes) {
    624 			bp = mbp;
    625 		} else {
    626 			s = splbio();
    627 			bp = pool_get(&bufpool, PR_WAITOK);
    628 			splx(s);
    629 			bp->b_data = (void *)(kva + offset - pg->offset);
    630 			bp->b_resid = bp->b_bcount = iobytes;
    631 			bp->b_flags = B_BUSY|B_READ|B_CALL;
    632 			bp->b_iodone = uvm_aio_biodone1;
    633 			bp->b_vp = vp;
    634 		}
    635 		bp->b_lblkno = 0;
    636 		bp->b_private = mbp;
    637 
    638 		/* adjust physical blkno for partial blocks */
    639 		bp->b_blkno = blkno + ((offset - (lbn << bshift)) >>
    640 				       dev_bshift);
    641 
    642 		UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
    643 			    bp, (int)offset, (int)iobytes, bp->b_blkno);
    644 
    645 		VOP_STRATEGY(bp);
    646 	}
    647 loopdone:
    648 
    649 	if (skipbytes) {
    650 		s = splbio();
    651 		mbp->b_resid -= skipbytes;
    652 		if (mbp->b_resid == 0) {
    653 			biodone(mbp);
    654 		}
    655 		splx(s);
    656 	}
    657 
    658 	/*
    659 	 * if EOF is in the middle of this page, zero the part past EOF.
    660 	 */
    661 
    662 	if (tailbytes > 0) {
    663 		memset(kva + (offset - origoffset), 0, tailbytes);
    664 	}
    665 
    666 looperr:
    667 	if ((flags & PGO_SYNCIO) == 0) {
    668 		UVMHIST_LOG(ubchist, "returning PEND",0,0,0,0);
    669 		return EINPROGRESS;
    670 	}
    671 	if (bp != NULL) {
    672 		error = biowait(mbp);
    673 	}
    674 	s = splbio();
    675 	pool_put(&bufpool, mbp);
    676 	splx(s);
    677 	uvm_pagermapout((vaddr_t)kva, npages);
    678 
    679 	/*
    680 	 * if this we encountered a hole then we have to do a little more work.
    681 	 * for read faults, we must mark the page PG_RDONLY so that future
    682 	 * write accesses to the page will fault again.
    683 	 * for write faults, we must make sure that the backing store for
    684 	 * the page is completely allocated.
    685 	 */
    686 
    687 	if (sawhole) {
    688 		if ((ap->a_access_type & VM_PROT_WRITE) == 0) {
    689 			pg->flags |= PG_RDONLY;
    690 			UVMHIST_LOG(ubchist, "setting PG_RDONLY",
    691 				    0,0,0,0);
    692 		} else {
    693 			/* XXX loop VOP_BALLOC() over the page/block */
    694 			error = VOP_BALLOC(vp, offset, bsize, cred, 0, NULL);
    695 			if (error) {
    696 				UVMHIST_LOG(ubchist, "balloc lbn 0x%x -> %d",
    697 					    lbn, error,0,0);
    698 				goto out;
    699 			}
    700 		}
    701 	}
    702 
    703 	/*
    704 	 * see if we want to start any readahead.
    705 	 * XXX writeme
    706 	 */
    707 
    708 	/*
    709 	 * we're almost done!  release the pages...
    710 	 * for errors, we free the pages.
    711 	 * otherwise we activate them and mark them as valid and clean.
    712 	 * also, unbusy all but the center page.
    713 	 */
    714 
    715 out:
    716 	if (error) {
    717 		simple_lock(&uobj->vmobjlock);
    718 		for (i = 0; i < npages; i++) {
    719 			UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
    720 				    pgs[i], pgs[i]->flags, 0,0);
    721 			if (pgs[i]->flags & PG_FAKE) {
    722 				if (pgs[i]->flags & PG_WANTED) {
    723 					wakeup(pgs[i]);
    724 				}
    725 				uvm_pagefree(pgs[i]);
    726 			}
    727 		}
    728 		simple_unlock(&uobj->vmobjlock);
    729 		UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
    730 		return error;
    731 	}
    732 
    733 	UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
    734 	simple_lock(&uobj->vmobjlock);
    735 	for (i = 0; i < npages; i++) {
    736 		UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
    737 			    pgs[i], pgs[i]->flags, 0,0);
    738 		if (pgs[i]->flags & PG_FAKE) {
    739 			UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x",
    740 				    pgs[i], (int)pgs[i]->offset,0,0);
    741 			pgs[i]->flags &= ~(PG_FAKE);
    742 			pmap_clear_modify(PMAP_PGARG(pgs[i]));
    743 			pmap_clear_reference(PMAP_PGARG(pgs[i]));
    744 		}
    745 		if (pgs[i] != pg) {
    746 			UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
    747 				    pgs[i], (int)pgs[i]->offset,0,0);
    748 			/*
    749 			KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
    750 			*/
    751 
    752 			if (pgs[i]->flags & PG_WANTED) {
    753 				wakeup(pgs[i]);
    754 			}
    755 			pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
    756 			UVM_PAGE_OWN(pgs[i], NULL);
    757 		}
    758 	}
    759 	simple_unlock(&uobj->vmobjlock);
    760 	ap->a_m[ap->a_centeridx] = pg;
    761 	return 0;
    762 }
    763 
    764 /*
    765  * generic VM putpages routine.
    766  * Write the given range of pages to backing store.
    767  */
    768 int
    769 genfs_putpages(v)
    770 	void *v;
    771 {
    772 	struct vop_putpages_args /* {
    773 		struct vnode *a_vp;
    774 		struct vm_page **a_m;
    775 		int a_count;
    776 		int a_sync;
    777 		int *a_rtvals;
    778 	} */ *ap = v;
    779 
    780 	int s, error, bshift, dev_bshift, dev_bsize, run;
    781 	vaddr_t kva;
    782 	off_t offset;
    783 	size_t bytes, iobytes;
    784 	daddr_t lbn, blkno;
    785 	struct vm_page *pg;
    786 	struct buf *mbp, *bp;
    787 	struct vnode *vp = ap->a_vp;
    788 	UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
    789 
    790 	error = 0;
    791 	bshift = vp->v_mount->mnt_fs_bshift;
    792 	dev_bshift = vp->v_mount->mnt_dev_bshift;
    793 	dev_bsize = 1 << dev_bshift;
    794 
    795 	pg = ap->a_m[0];
    796 	offset = pg->offset;
    797 	bytes = min(ap->a_count << PAGE_SHIFT,
    798 		    (vp->v_uvm.u_size - offset + dev_bsize - 1) &
    799 		    ~(dev_bsize - 1));
    800 
    801 	kva = uvm_pagermapin(ap->a_m, ap->a_count, M_WAITOK);
    802 
    803 	s = splbio();
    804 	vp->v_numoutput++;
    805 	mbp = pool_get(&bufpool, PR_WAITOK);
    806 	splx(s);
    807 	mbp->b_bufsize = ap->a_count << PAGE_SHIFT;
    808 	mbp->b_data = (void *)kva;
    809 	mbp->b_resid = mbp->b_bcount = bytes;
    810 	mbp->b_flags = B_BUSY|B_WRITE| (ap->a_sync ? 0 : B_CALL) |
    811 		(curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0);
    812 
    813 	mbp->b_iodone = uvm_aio_biodone;
    814 	mbp->b_vp = vp;
    815 
    816 	bp = NULL;
    817 	for (; bytes > 0; offset += iobytes, bytes -= iobytes) {
    818 		lbn = offset >> bshift;
    819 		error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
    820 		if (error) {
    821 			UVMHIST_LOG(ubchist, "VOP_BALLOC() -> %d", error,0,0,0);
    822 			goto errout;
    823 		}
    824 
    825 		/* this could be ifdef DIAGNOSTIC, but it's really important */
    826 		if (blkno == (daddr_t)-1) {
    827 			panic("genfs_putpages: no backing store "
    828 			      "vp %p off 0x%x lbn 0x%x",
    829 			      vp, (int)offset, lbn);
    830 		}
    831 
    832 		/* if it's really one i/o, don't make a second buf */
    833 		iobytes = min(((lbn + 1 + run) << bshift) - offset, bytes);
    834 		if (bp == NULL && iobytes == bytes) {
    835 			bp = mbp;
    836 		} else {
    837 			s = splbio();
    838 			vp->v_numoutput++;
    839 			bp = pool_get(&bufpool, PR_WAITOK);
    840 			splx(s);
    841 			bp->b_data = (char *)kva +
    842 				(vsize_t)(offset - pg->offset);
    843 			bp->b_resid = bp->b_bcount = iobytes;
    844 			bp->b_flags = B_BUSY|B_WRITE|B_CALL;
    845 			bp->b_iodone = uvm_aio_biodone1;
    846 			bp->b_vp = vp;
    847 		}
    848 		bp->b_lblkno = 0;
    849 		bp->b_private = mbp;
    850 
    851 		/* adjust physical blkno for partial blocks */
    852 		bp->b_blkno = blkno + ((offset - (lbn << bshift)) >>
    853 				       dev_bshift);
    854 		UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
    855 			    vp, (int)offset, (int)iobytes, bp->b_blkno);
    856 
    857 		VOP_STRATEGY(bp);
    858 	}
    859 
    860 	if (!ap->a_sync) {
    861 		return EINPROGRESS;
    862 	}
    863 
    864 errout:
    865 	if (bp != NULL) {
    866 		error = biowait(mbp);
    867 	}
    868 	s = splbio();
    869 	pool_put(&bufpool, mbp);
    870 	splx(s);
    871 	uvm_pagermapout(kva, ap->a_count);
    872 	UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
    873 
    874 	return error;
    875 }
    876