Home | History | Annotate | Line # | Download | only in client
nfs_clvnops.c revision 1.3
      1 /*	$NetBSD: nfs_clvnops.c,v 1.3 2016/11/18 08:31:30 pgoyette Exp $	*/
      2 /*-
      3  * Copyright (c) 1989, 1993
      4  *	The Regents of the University of California.  All rights reserved.
      5  *
      6  * This code is derived from software contributed to Berkeley by
      7  * Rick Macklem at The University of Guelph.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 4. Neither the name of the University nor the names of its contributors
     18  *    may be used to endorse or promote products derived from this software
     19  *    without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  *
     33  *	from nfs_vnops.c	8.16 (Berkeley) 5/27/95
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 /* __FBSDID("FreeBSD: head/sys/fs/nfsclient/nfs_clvnops.c 304026 2016-08-12 22:44:59Z rmacklem "); */
     38 __RCSID("$NetBSD: nfs_clvnops.c,v 1.3 2016/11/18 08:31:30 pgoyette Exp $");
     39 
     40 /*
     41  * vnode op calls for Sun NFS version 2, 3 and 4
     42  */
     43 
     44 #include "opt_inet.h"
     45 
     46 #include <sys/param.h>
     47 #include <sys/kernel.h>
     48 #include <sys/systm.h>
     49 #include <sys/resourcevar.h>
     50 #include <sys/proc.h>
     51 #include <sys/mount.h>
     52 #include <sys/bio.h>
     53 #include <sys/buf.h>
     54 #include <sys/jail.h>
     55 #include <sys/malloc.h>
     56 #include <sys/mbuf.h>
     57 #include <sys/namei.h>
     58 #include <sys/socket.h>
     59 #include <sys/vnode.h>
     60 #include <sys/dirent.h>
     61 #include <sys/fcntl.h>
     62 #include <sys/lockf.h>
     63 #include <sys/stat.h>
     64 #include <sys/sysctl.h>
     65 #include <sys/signalvar.h>
     66 
     67 #include <vm/vm.h>
     68 #include <vm/vm_extern.h>
     69 #include <vm/vm_object.h>
     70 
     71 #include <fs/nfs/nfsport.h>
     72 #include <fs/nfsclient/nfsnode.h>
     73 #include <fs/nfsclient/nfsmount.h>
     74 #include <fs/nfsclient/nfs.h>
     75 #include <fs/nfsclient/nfs_kdtrace.h>
     76 
     77 #include <net/if.h>
     78 #include <netinet/in.h>
     79 #include <netinet/in_var.h>
     80 
     81 #include <nfs/nfs_lock.h>
     82 
     83 #ifdef KDTRACE_HOOKS
     84 #include <sys/dtrace_bsd.h>
     85 
     86 dtrace_nfsclient_accesscache_flush_probe_func_t
     87 		dtrace_nfscl_accesscache_flush_done_probe;
     88 uint32_t	nfscl_accesscache_flush_done_id;
     89 
     90 dtrace_nfsclient_accesscache_get_probe_func_t
     91 		dtrace_nfscl_accesscache_get_hit_probe,
     92 		dtrace_nfscl_accesscache_get_miss_probe;
     93 uint32_t	nfscl_accesscache_get_hit_id;
     94 uint32_t	nfscl_accesscache_get_miss_id;
     95 
     96 dtrace_nfsclient_accesscache_load_probe_func_t
     97 		dtrace_nfscl_accesscache_load_done_probe;
     98 uint32_t	nfscl_accesscache_load_done_id;
     99 #endif /* !KDTRACE_HOOKS */
    100 
    101 /* Defs */
    102 #define	TRUE	1
    103 #define	FALSE	0
    104 
    105 extern struct nfsstatsv1 nfsstatsv1;
    106 extern int nfsrv_useacl;
    107 extern int nfscl_debuglevel;
    108 MALLOC_DECLARE(M_NEWNFSREQ);
    109 
    110 /*
    111  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
    112  * calls are not in getblk() and brelse() so that they would not be necessary
    113  * here.
    114  */
    115 #ifndef B_VMIO
    116 #define	vfs_busy_pages(bp, f)
    117 #endif
    118 
    119 static vop_read_t	nfsfifo_read;
    120 static vop_write_t	nfsfifo_write;
    121 static vop_close_t	nfsfifo_close;
    122 static int	nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
    123 		    struct thread *);
    124 static vop_lookup_t	nfs_lookup;
    125 static vop_create_t	nfs_create;
    126 static vop_mknod_t	nfs_mknod;
    127 static vop_open_t	nfs_open;
    128 static vop_pathconf_t	nfs_pathconf;
    129 static vop_close_t	nfs_close;
    130 static vop_access_t	nfs_access;
    131 static vop_getattr_t	nfs_getattr;
    132 static vop_setattr_t	nfs_setattr;
    133 static vop_read_t	nfs_read;
    134 static vop_fsync_t	nfs_fsync;
    135 static vop_remove_t	nfs_remove;
    136 static vop_link_t	nfs_link;
    137 static vop_rename_t	nfs_rename;
    138 static vop_mkdir_t	nfs_mkdir;
    139 static vop_rmdir_t	nfs_rmdir;
    140 static vop_symlink_t	nfs_symlink;
    141 static vop_readdir_t	nfs_readdir;
    142 static vop_strategy_t	nfs_strategy;
    143 static	int	nfs_lookitup(struct vnode *, char *, int,
    144 		    struct ucred *, struct thread *, struct nfsnode **);
    145 static	int	nfs_sillyrename(struct vnode *, struct vnode *,
    146 		    struct componentname *);
    147 static vop_access_t	nfsspec_access;
    148 static vop_readlink_t	nfs_readlink;
    149 static vop_print_t	nfs_print;
    150 static vop_advlock_t	nfs_advlock;
    151 static vop_advlockasync_t nfs_advlockasync;
    152 static vop_getacl_t nfs_getacl;
    153 static vop_setacl_t nfs_setacl;
    154 
    155 /*
    156  * Global vfs data structures for nfs
    157  */
    158 struct vop_vector newnfs_vnodeops = {
    159 	.vop_default =		&default_vnodeops,
    160 	.vop_access =		nfs_access,
    161 	.vop_advlock =		nfs_advlock,
    162 	.vop_advlockasync =	nfs_advlockasync,
    163 	.vop_close =		nfs_close,
    164 	.vop_create =		nfs_create,
    165 	.vop_fsync =		nfs_fsync,
    166 	.vop_getattr =		nfs_getattr,
    167 	.vop_getpages =		ncl_getpages,
    168 	.vop_putpages =		ncl_putpages,
    169 	.vop_inactive =		ncl_inactive,
    170 	.vop_link =		nfs_link,
    171 	.vop_lookup =		nfs_lookup,
    172 	.vop_mkdir =		nfs_mkdir,
    173 	.vop_mknod =		nfs_mknod,
    174 	.vop_open =		nfs_open,
    175 	.vop_pathconf =		nfs_pathconf,
    176 	.vop_print =		nfs_print,
    177 	.vop_read =		nfs_read,
    178 	.vop_readdir =		nfs_readdir,
    179 	.vop_readlink =		nfs_readlink,
    180 	.vop_reclaim =		ncl_reclaim,
    181 	.vop_remove =		nfs_remove,
    182 	.vop_rename =		nfs_rename,
    183 	.vop_rmdir =		nfs_rmdir,
    184 	.vop_setattr =		nfs_setattr,
    185 	.vop_strategy =		nfs_strategy,
    186 	.vop_symlink =		nfs_symlink,
    187 	.vop_write =		ncl_write,
    188 	.vop_getacl =		nfs_getacl,
    189 	.vop_setacl =		nfs_setacl,
    190 };
    191 
    192 struct vop_vector newnfs_fifoops = {
    193 	.vop_default =		&fifo_specops,
    194 	.vop_access =		nfsspec_access,
    195 	.vop_close =		nfsfifo_close,
    196 	.vop_fsync =		nfs_fsync,
    197 	.vop_getattr =		nfs_getattr,
    198 	.vop_inactive =		ncl_inactive,
    199 	.vop_print =		nfs_print,
    200 	.vop_read =		nfsfifo_read,
    201 	.vop_reclaim =		ncl_reclaim,
    202 	.vop_setattr =		nfs_setattr,
    203 	.vop_write =		nfsfifo_write,
    204 };
    205 
    206 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
    207     struct componentname *cnp, struct vattr *vap);
    208 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
    209     int namelen, struct ucred *cred, struct thread *td);
    210 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp,
    211     char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp,
    212     char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td);
    213 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp,
    214     struct componentname *scnp, struct sillyrename *sp);
    215 
    216 /*
    217  * Global variables
    218  */
    219 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
    220 
    221 SYSCTL_DECL(_vfs_nfs);
    222 
    223 static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
    224 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
    225 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
    226 
    227 static int	nfs_prime_access_cache = 0;
    228 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
    229 	   &nfs_prime_access_cache, 0,
    230 	   "Prime NFS ACCESS cache when fetching attributes");
    231 
    232 static int	newnfs_commit_on_close = 0;
    233 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW,
    234     &newnfs_commit_on_close, 0, "write+commit on close, else only write");
    235 
    236 static int	nfs_clean_pages_on_close = 1;
    237 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
    238 	   &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
    239 
    240 int newnfs_directio_enable = 0;
    241 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
    242 	   &newnfs_directio_enable, 0, "Enable NFS directio");
    243 
    244 int nfs_keep_dirty_on_error;
    245 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW,
    246     &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned");
    247 
    248 /*
    249  * This sysctl allows other processes to mmap a file that has been opened
    250  * O_DIRECT by a process.  In general, having processes mmap the file while
    251  * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
    252  * this by default to prevent DoS attacks - to prevent a malicious user from
    253  * opening up files O_DIRECT preventing other users from mmap'ing these
    254  * files.  "Protected" environments where stricter consistency guarantees are
    255  * required can disable this knob.  The process that opened the file O_DIRECT
    256  * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
    257  * meaningful.
    258  */
    259 int newnfs_directio_allow_mmap = 1;
    260 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
    261 	   &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
    262 
    263 #define	NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY		\
    264 			 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE	\
    265 			 | NFSACCESS_DELETE | NFSACCESS_LOOKUP)
    266 
    267 /*
    268  * SMP Locking Note :
    269  * The list of locks after the description of the lock is the ordering
    270  * of other locks acquired with the lock held.
    271  * np->n_mtx : Protects the fields in the nfsnode.
    272        VM Object Lock
    273        VI_MTX (acquired indirectly)
    274  * nmp->nm_mtx : Protects the fields in the nfsmount.
    275        rep->r_mtx
    276  * ncl_iod_mutex : Global lock, protects shared nfsiod state.
    277  * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
    278        nmp->nm_mtx
    279        rep->r_mtx
    280  * rep->r_mtx : Protects the fields in an nfsreq.
    281  */
    282 
    283 static int
    284 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td,
    285     struct ucred *cred, u_int32_t *retmode)
    286 {
    287 	int error = 0, attrflag, i, lrupos;
    288 	u_int32_t rmode;
    289 	struct nfsnode *np = VTONFS(vp);
    290 	struct nfsvattr nfsva;
    291 
    292 	error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag,
    293 	    &rmode, NULL);
    294 	if (attrflag)
    295 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
    296 	if (!error) {
    297 		lrupos = 0;
    298 		mtx_lock(&np->n_mtx);
    299 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
    300 			if (np->n_accesscache[i].uid == cred->cr_uid) {
    301 				np->n_accesscache[i].mode = rmode;
    302 				np->n_accesscache[i].stamp = time_second;
    303 				break;
    304 			}
    305 			if (i > 0 && np->n_accesscache[i].stamp <
    306 			    np->n_accesscache[lrupos].stamp)
    307 				lrupos = i;
    308 		}
    309 		if (i == NFS_ACCESSCACHESIZE) {
    310 			np->n_accesscache[lrupos].uid = cred->cr_uid;
    311 			np->n_accesscache[lrupos].mode = rmode;
    312 			np->n_accesscache[lrupos].stamp = time_second;
    313 		}
    314 		mtx_unlock(&np->n_mtx);
    315 		if (retmode != NULL)
    316 			*retmode = rmode;
    317 		KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0);
    318 	} else if (NFS_ISV4(vp)) {
    319 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
    320 	}
    321 #ifdef KDTRACE_HOOKS
    322 	if (error != 0)
    323 		KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0,
    324 		    error);
    325 #endif
    326 	return (error);
    327 }
    328 
    329 /*
    330  * nfs access vnode op.
    331  * For nfs version 2, just return ok. File accesses may fail later.
    332  * For nfs version 3, use the access rpc to check accessibility. If file modes
    333  * are changed on the server, accesses might still fail later.
    334  */
    335 static int
    336 nfs_access(struct vop_access_args *ap)
    337 {
    338 	struct vnode *vp = ap->a_vp;
    339 	int error = 0, i, gotahit;
    340 	u_int32_t mode, wmode, rmode;
    341 	int v34 = NFS_ISV34(vp);
    342 	struct nfsnode *np = VTONFS(vp);
    343 
    344 	/*
    345 	 * Disallow write attempts on filesystems mounted read-only;
    346 	 * unless the file is a socket, fifo, or a block or character
    347 	 * device resident on the filesystem.
    348 	 */
    349 	if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS |
    350 	    VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL |
    351 	    VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
    352 		switch (vp->v_type) {
    353 		case VREG:
    354 		case VDIR:
    355 		case VLNK:
    356 			return (EROFS);
    357 		default:
    358 			break;
    359 		}
    360 	}
    361 	/*
    362 	 * For nfs v3 or v4, check to see if we have done this recently, and if
    363 	 * so return our cached result instead of making an ACCESS call.
    364 	 * If not, do an access rpc, otherwise you are stuck emulating
    365 	 * ufs_access() locally using the vattr. This may not be correct,
    366 	 * since the server may apply other access criteria such as
    367 	 * client uid-->server uid mapping that we do not know about.
    368 	 */
    369 	if (v34) {
    370 		if (ap->a_accmode & VREAD)
    371 			mode = NFSACCESS_READ;
    372 		else
    373 			mode = 0;
    374 		if (vp->v_type != VDIR) {
    375 			if (ap->a_accmode & VWRITE)
    376 				mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
    377 			if (ap->a_accmode & VAPPEND)
    378 				mode |= NFSACCESS_EXTEND;
    379 			if (ap->a_accmode & VEXEC)
    380 				mode |= NFSACCESS_EXECUTE;
    381 			if (ap->a_accmode & VDELETE)
    382 				mode |= NFSACCESS_DELETE;
    383 		} else {
    384 			if (ap->a_accmode & VWRITE)
    385 				mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
    386 			if (ap->a_accmode & VAPPEND)
    387 				mode |= NFSACCESS_EXTEND;
    388 			if (ap->a_accmode & VEXEC)
    389 				mode |= NFSACCESS_LOOKUP;
    390 			if (ap->a_accmode & VDELETE)
    391 				mode |= NFSACCESS_DELETE;
    392 			if (ap->a_accmode & VDELETE_CHILD)
    393 				mode |= NFSACCESS_MODIFY;
    394 		}
    395 		/* XXX safety belt, only make blanket request if caching */
    396 		if (nfsaccess_cache_timeout > 0) {
    397 			wmode = NFSACCESS_READ | NFSACCESS_MODIFY |
    398 				NFSACCESS_EXTEND | NFSACCESS_EXECUTE |
    399 				NFSACCESS_DELETE | NFSACCESS_LOOKUP;
    400 		} else {
    401 			wmode = mode;
    402 		}
    403 
    404 		/*
    405 		 * Does our cached result allow us to give a definite yes to
    406 		 * this request?
    407 		 */
    408 		gotahit = 0;
    409 		mtx_lock(&np->n_mtx);
    410 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
    411 			if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) {
    412 			    if (time_second < (np->n_accesscache[i].stamp
    413 				+ nfsaccess_cache_timeout) &&
    414 				(np->n_accesscache[i].mode & mode) == mode) {
    415 				NFSINCRGLOBAL(nfsstatsv1.accesscache_hits);
    416 				gotahit = 1;
    417 			    }
    418 			    break;
    419 			}
    420 		}
    421 		mtx_unlock(&np->n_mtx);
    422 #ifdef KDTRACE_HOOKS
    423 		if (gotahit != 0)
    424 			KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp,
    425 			    ap->a_cred->cr_uid, mode);
    426 		else
    427 			KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp,
    428 			    ap->a_cred->cr_uid, mode);
    429 #endif
    430 		if (gotahit == 0) {
    431 			/*
    432 			 * Either a no, or a don't know.  Go to the wire.
    433 			 */
    434 			NFSINCRGLOBAL(nfsstatsv1.accesscache_misses);
    435 		        error = nfs34_access_otw(vp, wmode, ap->a_td,
    436 			    ap->a_cred, &rmode);
    437 			if (!error &&
    438 			    (rmode & mode) != mode)
    439 				error = EACCES;
    440 		}
    441 		return (error);
    442 	} else {
    443 		if ((error = nfsspec_access(ap)) != 0) {
    444 			return (error);
    445 		}
    446 		/*
    447 		 * Attempt to prevent a mapped root from accessing a file
    448 		 * which it shouldn't.  We try to read a byte from the file
    449 		 * if the user is root and the file is not zero length.
    450 		 * After calling nfsspec_access, we should have the correct
    451 		 * file size cached.
    452 		 */
    453 		mtx_lock(&np->n_mtx);
    454 		if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
    455 		    && VTONFS(vp)->n_size > 0) {
    456 			struct iovec aiov;
    457 			struct uio auio;
    458 			char buf[1];
    459 
    460 			mtx_unlock(&np->n_mtx);
    461 			aiov.iov_base = buf;
    462 			aiov.iov_len = 1;
    463 			auio.uio_iov = &aiov;
    464 			auio.uio_iovcnt = 1;
    465 			auio.uio_offset = 0;
    466 			auio.uio_resid = 1;
    467 			auio.uio_segflg = UIO_SYSSPACE;
    468 			auio.uio_rw = UIO_READ;
    469 			auio.uio_td = ap->a_td;
    470 
    471 			if (vp->v_type == VREG)
    472 				error = ncl_readrpc(vp, &auio, ap->a_cred);
    473 			else if (vp->v_type == VDIR) {
    474 				char* bp;
    475 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
    476 				aiov.iov_base = bp;
    477 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
    478 				error = ncl_readdirrpc(vp, &auio, ap->a_cred,
    479 				    ap->a_td);
    480 				free(bp, M_TEMP);
    481 			} else if (vp->v_type == VLNK)
    482 				error = ncl_readlinkrpc(vp, &auio, ap->a_cred);
    483 			else
    484 				error = EACCES;
    485 		} else
    486 			mtx_unlock(&np->n_mtx);
    487 		return (error);
    488 	}
    489 }
    490 
    491 
    492 /*
    493  * nfs open vnode op
    494  * Check to see if the type is ok
    495  * and that deletion is not in progress.
    496  * For paged in text files, you will need to flush the page cache
    497  * if consistency is lost.
    498  */
    499 /* ARGSUSED */
    500 static int
    501 nfs_open(struct vop_open_args *ap)
    502 {
    503 	struct vnode *vp = ap->a_vp;
    504 	struct nfsnode *np = VTONFS(vp);
    505 	struct vattr vattr;
    506 	int error;
    507 	int fmode = ap->a_mode;
    508 	struct ucred *cred;
    509 
    510 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
    511 		return (EOPNOTSUPP);
    512 
    513 	/*
    514 	 * For NFSv4, we need to do the Open Op before cache validation,
    515 	 * so that we conform to RFC3530 Sec. 9.3.1.
    516 	 */
    517 	if (NFS_ISV4(vp)) {
    518 		error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td);
    519 		if (error) {
    520 			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
    521 			    (gid_t)0);
    522 			return (error);
    523 		}
    524 	}
    525 
    526 	/*
    527 	 * Now, if this Open will be doing reading, re-validate/flush the
    528 	 * cache, so that Close/Open coherency is maintained.
    529 	 */
    530 	mtx_lock(&np->n_mtx);
    531 	if (np->n_flag & NMODIFIED) {
    532 		mtx_unlock(&np->n_mtx);
    533 		error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
    534 		if (error == EINTR || error == EIO) {
    535 			if (NFS_ISV4(vp))
    536 				(void) nfsrpc_close(vp, 0, ap->a_td);
    537 			return (error);
    538 		}
    539 		mtx_lock(&np->n_mtx);
    540 		np->n_attrstamp = 0;
    541 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
    542 		if (vp->v_type == VDIR)
    543 			np->n_direofoffset = 0;
    544 		mtx_unlock(&np->n_mtx);
    545 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
    546 		if (error) {
    547 			if (NFS_ISV4(vp))
    548 				(void) nfsrpc_close(vp, 0, ap->a_td);
    549 			return (error);
    550 		}
    551 		mtx_lock(&np->n_mtx);
    552 		np->n_mtime = vattr.va_mtime;
    553 		if (NFS_ISV4(vp))
    554 			np->n_change = vattr.va_filerev;
    555 	} else {
    556 		mtx_unlock(&np->n_mtx);
    557 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
    558 		if (error) {
    559 			if (NFS_ISV4(vp))
    560 				(void) nfsrpc_close(vp, 0, ap->a_td);
    561 			return (error);
    562 		}
    563 		mtx_lock(&np->n_mtx);
    564 		if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) ||
    565 		    NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
    566 			if (vp->v_type == VDIR)
    567 				np->n_direofoffset = 0;
    568 			mtx_unlock(&np->n_mtx);
    569 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
    570 			if (error == EINTR || error == EIO) {
    571 				if (NFS_ISV4(vp))
    572 					(void) nfsrpc_close(vp, 0, ap->a_td);
    573 				return (error);
    574 			}
    575 			mtx_lock(&np->n_mtx);
    576 			np->n_mtime = vattr.va_mtime;
    577 			if (NFS_ISV4(vp))
    578 				np->n_change = vattr.va_filerev;
    579 		}
    580 	}
    581 
    582 	/*
    583 	 * If the object has >= 1 O_DIRECT active opens, we disable caching.
    584 	 */
    585 	if (newnfs_directio_enable && (fmode & O_DIRECT) &&
    586 	    (vp->v_type == VREG)) {
    587 		if (np->n_directio_opens == 0) {
    588 			mtx_unlock(&np->n_mtx);
    589 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
    590 			if (error) {
    591 				if (NFS_ISV4(vp))
    592 					(void) nfsrpc_close(vp, 0, ap->a_td);
    593 				return (error);
    594 			}
    595 			mtx_lock(&np->n_mtx);
    596 			np->n_flag |= NNONCACHE;
    597 		}
    598 		np->n_directio_opens++;
    599 	}
    600 
    601 	/* If opened for writing via NFSv4.1 or later, mark that for pNFS. */
    602 	if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0)
    603 		np->n_flag |= NWRITEOPENED;
    604 
    605 	/*
    606 	 * If this is an open for writing, capture a reference to the
    607 	 * credentials, so they can be used by ncl_putpages(). Using
    608 	 * these write credentials is preferable to the credentials of
    609 	 * whatever thread happens to be doing the VOP_PUTPAGES() since
    610 	 * the write RPCs are less likely to fail with EACCES.
    611 	 */
    612 	if ((fmode & FWRITE) != 0) {
    613 		cred = np->n_writecred;
    614 		np->n_writecred = crhold(ap->a_cred);
    615 	} else
    616 		cred = NULL;
    617 	mtx_unlock(&np->n_mtx);
    618 
    619 	if (cred != NULL)
    620 		crfree(cred);
    621 	vnode_create_vobject(vp, vattr.va_size, ap->a_td);
    622 	return (0);
    623 }
    624 
    625 /*
    626  * nfs close vnode op
    627  * What an NFS client should do upon close after writing is a debatable issue.
    628  * Most NFS clients push delayed writes to the server upon close, basically for
    629  * two reasons:
    630  * 1 - So that any write errors may be reported back to the client process
    631  *     doing the close system call. By far the two most likely errors are
    632  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
    633  * 2 - To put a worst case upper bound on cache inconsistency between
    634  *     multiple clients for the file.
    635  * There is also a consistency problem for Version 2 of the protocol w.r.t.
    636  * not being able to tell if other clients are writing a file concurrently,
    637  * since there is no way of knowing if the changed modify time in the reply
    638  * is only due to the write for this client.
    639  * (NFS Version 3 provides weak cache consistency data in the reply that
    640  *  should be sufficient to detect and handle this case.)
    641  *
    642  * The current code does the following:
    643  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
    644  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
    645  *                     or commit them (this satisfies 1 and 2 except for the
    646  *                     case where the server crashes after this close but
    647  *                     before the commit RPC, which is felt to be "good
    648  *                     enough". Changing the last argument to ncl_flush() to
    649  *                     a 1 would force a commit operation, if it is felt a
    650  *                     commit is necessary now.
    651  * for NFS Version 4 - flush the dirty buffers and commit them, if
    652  *		       nfscl_mustflush() says this is necessary.
    653  *                     It is necessary if there is no write delegation held,
    654  *                     in order to satisfy open/close coherency.
    655  *                     If the file isn't cached on local stable storage,
    656  *                     it may be necessary in order to detect "out of space"
    657  *                     errors from the server, if the write delegation
    658  *                     issued by the server doesn't allow the file to grow.
    659  */
    660 /* ARGSUSED */
    661 static int
    662 nfs_close(struct vop_close_args *ap)
    663 {
    664 	struct vnode *vp = ap->a_vp;
    665 	struct nfsnode *np = VTONFS(vp);
    666 	struct nfsvattr nfsva;
    667 	struct ucred *cred;
    668 	int error = 0, ret, localcred = 0;
    669 	int fmode = ap->a_fflag;
    670 
    671 	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF))
    672 		return (0);
    673 	/*
    674 	 * During shutdown, a_cred isn't valid, so just use root.
    675 	 */
    676 	if (ap->a_cred == NOCRED) {
    677 		cred = newnfs_getcred();
    678 		localcred = 1;
    679 	} else {
    680 		cred = ap->a_cred;
    681 	}
    682 	if (vp->v_type == VREG) {
    683 	    /*
    684 	     * Examine and clean dirty pages, regardless of NMODIFIED.
    685 	     * This closes a major hole in close-to-open consistency.
    686 	     * We want to push out all dirty pages (and buffers) on
    687 	     * close, regardless of whether they were dirtied by
    688 	     * mmap'ed writes or via write().
    689 	     */
    690 	    if (nfs_clean_pages_on_close && vp->v_object) {
    691 		VM_OBJECT_WLOCK(vp->v_object);
    692 		vm_object_page_clean(vp->v_object, 0, 0, 0);
    693 		VM_OBJECT_WUNLOCK(vp->v_object);
    694 	    }
    695 	    mtx_lock(&np->n_mtx);
    696 	    if (np->n_flag & NMODIFIED) {
    697 		mtx_unlock(&np->n_mtx);
    698 		if (NFS_ISV3(vp)) {
    699 		    /*
    700 		     * Under NFSv3 we have dirty buffers to dispose of.  We
    701 		     * must flush them to the NFS server.  We have the option
    702 		     * of waiting all the way through the commit rpc or just
    703 		     * waiting for the initial write.  The default is to only
    704 		     * wait through the initial write so the data is in the
    705 		     * server's cache, which is roughly similar to the state
    706 		     * a standard disk subsystem leaves the file in on close().
    707 		     *
    708 		     * We cannot clear the NMODIFIED bit in np->n_flag due to
    709 		     * potential races with other processes, and certainly
    710 		     * cannot clear it if we don't commit.
    711 		     * These races occur when there is no longer the old
    712 		     * traditional vnode locking implemented for Vnode Ops.
    713 		     */
    714 		    int cm = newnfs_commit_on_close ? 1 : 0;
    715 		    error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0);
    716 		    /* np->n_flag &= ~NMODIFIED; */
    717 		} else if (NFS_ISV4(vp)) {
    718 			if (nfscl_mustflush(vp) != 0) {
    719 				int cm = newnfs_commit_on_close ? 1 : 0;
    720 				error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td,
    721 				    cm, 0);
    722 				/*
    723 				 * as above w.r.t races when clearing
    724 				 * NMODIFIED.
    725 				 * np->n_flag &= ~NMODIFIED;
    726 				 */
    727 			}
    728 		} else
    729 		    error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
    730 		mtx_lock(&np->n_mtx);
    731 	    }
    732  	    /*
    733  	     * Invalidate the attribute cache in all cases.
    734  	     * An open is going to fetch fresh attrs any way, other procs
    735  	     * on this node that have file open will be forced to do an
    736  	     * otw attr fetch, but this is safe.
    737 	     * --> A user found that their RPC count dropped by 20% when
    738 	     *     this was commented out and I can't see any requirement
    739 	     *     for it, so I've disabled it when negative lookups are
    740 	     *     enabled. (What does this have to do with negative lookup
    741 	     *     caching? Well nothing, except it was reported by the
    742 	     *     same user that needed negative lookup caching and I wanted
    743 	     *     there to be a way to disable it to see if it
    744 	     *     is the cause of some caching/coherency issue that might
    745 	     *     crop up.)
    746  	     */
    747 	    if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) {
    748 		    np->n_attrstamp = 0;
    749 		    KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
    750 	    }
    751 	    if (np->n_flag & NWRITEERR) {
    752 		np->n_flag &= ~NWRITEERR;
    753 		error = np->n_error;
    754 	    }
    755 	    mtx_unlock(&np->n_mtx);
    756 	}
    757 
    758 	if (NFS_ISV4(vp)) {
    759 		/*
    760 		 * Get attributes so "change" is up to date.
    761 		 */
    762 		if (error == 0 && nfscl_mustflush(vp) != 0 &&
    763 		    vp->v_type == VREG &&
    764 		    (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) {
    765 			ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva,
    766 			    NULL);
    767 			if (!ret) {
    768 				np->n_change = nfsva.na_filerev;
    769 				(void) nfscl_loadattrcache(&vp, &nfsva, NULL,
    770 				    NULL, 0, 0);
    771 			}
    772 		}
    773 
    774 		/*
    775 		 * and do the close.
    776 		 */
    777 		ret = nfsrpc_close(vp, 0, ap->a_td);
    778 		if (!error && ret)
    779 			error = ret;
    780 		if (error)
    781 			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
    782 			    (gid_t)0);
    783 	}
    784 	if (newnfs_directio_enable)
    785 		KASSERT((np->n_directio_asyncwr == 0),
    786 			("nfs_close: dirty unflushed (%d) directio buffers\n",
    787 			 np->n_directio_asyncwr));
    788 	if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
    789 		mtx_lock(&np->n_mtx);
    790 		KASSERT((np->n_directio_opens > 0),
    791 			("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
    792 		np->n_directio_opens--;
    793 		if (np->n_directio_opens == 0)
    794 			np->n_flag &= ~NNONCACHE;
    795 		mtx_unlock(&np->n_mtx);
    796 	}
    797 	if (localcred)
    798 		NFSFREECRED(cred);
    799 	return (error);
    800 }
    801 
    802 /*
    803  * nfs getattr call from vfs.
    804  */
    805 static int
    806 nfs_getattr(struct vop_getattr_args *ap)
    807 {
    808 	struct vnode *vp = ap->a_vp;
    809 	struct thread *td = curthread;	/* XXX */
    810 	struct nfsnode *np = VTONFS(vp);
    811 	int error = 0;
    812 	struct nfsvattr nfsva;
    813 	struct vattr *vap = ap->a_vap;
    814 	struct vattr vattr;
    815 
    816 	/*
    817 	 * Update local times for special files.
    818 	 */
    819 	mtx_lock(&np->n_mtx);
    820 	if (np->n_flag & (NACC | NUPD))
    821 		np->n_flag |= NCHG;
    822 	mtx_unlock(&np->n_mtx);
    823 	/*
    824 	 * First look in the cache.
    825 	 */
    826 	if (ncl_getattrcache(vp, &vattr) == 0) {
    827 		vap->va_type = vattr.va_type;
    828 		vap->va_mode = vattr.va_mode;
    829 		vap->va_nlink = vattr.va_nlink;
    830 		vap->va_uid = vattr.va_uid;
    831 		vap->va_gid = vattr.va_gid;
    832 		vap->va_fsid = vattr.va_fsid;
    833 		vap->va_fileid = vattr.va_fileid;
    834 		vap->va_size = vattr.va_size;
    835 		vap->va_blocksize = vattr.va_blocksize;
    836 		vap->va_atime = vattr.va_atime;
    837 		vap->va_mtime = vattr.va_mtime;
    838 		vap->va_ctime = vattr.va_ctime;
    839 		vap->va_gen = vattr.va_gen;
    840 		vap->va_flags = vattr.va_flags;
    841 		vap->va_rdev = vattr.va_rdev;
    842 		vap->va_bytes = vattr.va_bytes;
    843 		vap->va_filerev = vattr.va_filerev;
    844 		/*
    845 		 * Get the local modify time for the case of a write
    846 		 * delegation.
    847 		 */
    848 		nfscl_deleggetmodtime(vp, &vap->va_mtime);
    849 		return (0);
    850 	}
    851 
    852 	if (NFS_ISV34(vp) && nfs_prime_access_cache &&
    853 	    nfsaccess_cache_timeout > 0) {
    854 		NFSINCRGLOBAL(nfsstatsv1.accesscache_misses);
    855 		nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL);
    856 		if (ncl_getattrcache(vp, ap->a_vap) == 0) {
    857 			nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime);
    858 			return (0);
    859 		}
    860 	}
    861 	error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL);
    862 	if (!error)
    863 		error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0);
    864 	if (!error) {
    865 		/*
    866 		 * Get the local modify time for the case of a write
    867 		 * delegation.
    868 		 */
    869 		nfscl_deleggetmodtime(vp, &vap->va_mtime);
    870 	} else if (NFS_ISV4(vp)) {
    871 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
    872 	}
    873 	return (error);
    874 }
    875 
    876 /*
    877  * nfs setattr call.
    878  */
    879 static int
    880 nfs_setattr(struct vop_setattr_args *ap)
    881 {
    882 	struct vnode *vp = ap->a_vp;
    883 	struct nfsnode *np = VTONFS(vp);
    884 	struct thread *td = curthread;	/* XXX */
    885 	struct vattr *vap = ap->a_vap;
    886 	int error = 0;
    887 	u_quad_t tsize;
    888 
    889 #ifndef nolint
    890 	tsize = (u_quad_t)0;
    891 #endif
    892 
    893 	/*
    894 	 * Setting of flags and marking of atimes are not supported.
    895 	 */
    896 	if (vap->va_flags != VNOVAL)
    897 		return (EOPNOTSUPP);
    898 
    899 	/*
    900 	 * Disallow write attempts if the filesystem is mounted read-only.
    901 	 */
    902   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
    903 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
    904 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
    905 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
    906 		return (EROFS);
    907 	if (vap->va_size != VNOVAL) {
    908  		switch (vp->v_type) {
    909  		case VDIR:
    910  			return (EISDIR);
    911  		case VCHR:
    912  		case VBLK:
    913  		case VSOCK:
    914  		case VFIFO:
    915 			if (vap->va_mtime.tv_sec == VNOVAL &&
    916 			    vap->va_atime.tv_sec == VNOVAL &&
    917 			    vap->va_mode == (mode_t)VNOVAL &&
    918 			    vap->va_uid == (uid_t)VNOVAL &&
    919 			    vap->va_gid == (gid_t)VNOVAL)
    920 				return (0);
    921  			vap->va_size = VNOVAL;
    922  			break;
    923  		default:
    924 			/*
    925 			 * Disallow write attempts if the filesystem is
    926 			 * mounted read-only.
    927 			 */
    928 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
    929 				return (EROFS);
    930 			/*
    931 			 *  We run vnode_pager_setsize() early (why?),
    932 			 * we must set np->n_size now to avoid vinvalbuf
    933 			 * V_SAVE races that might setsize a lower
    934 			 * value.
    935 			 */
    936 			mtx_lock(&np->n_mtx);
    937 			tsize = np->n_size;
    938 			mtx_unlock(&np->n_mtx);
    939 			error = ncl_meta_setsize(vp, ap->a_cred, td,
    940 			    vap->va_size);
    941 			mtx_lock(&np->n_mtx);
    942  			if (np->n_flag & NMODIFIED) {
    943 			    tsize = np->n_size;
    944 			    mtx_unlock(&np->n_mtx);
    945  			    if (vap->va_size == 0)
    946  				error = ncl_vinvalbuf(vp, 0, td, 1);
    947  			    else
    948  				error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
    949  			    if (error) {
    950 				vnode_pager_setsize(vp, tsize);
    951 				return (error);
    952 			    }
    953 			    /*
    954 			     * Call nfscl_delegmodtime() to set the modify time
    955 			     * locally, as required.
    956 			     */
    957 			    nfscl_delegmodtime(vp);
    958  			} else
    959 			    mtx_unlock(&np->n_mtx);
    960 			/*
    961 			 * np->n_size has already been set to vap->va_size
    962 			 * in ncl_meta_setsize(). We must set it again since
    963 			 * nfs_loadattrcache() could be called through
    964 			 * ncl_meta_setsize() and could modify np->n_size.
    965 			 */
    966 			mtx_lock(&np->n_mtx);
    967  			np->n_vattr.na_size = np->n_size = vap->va_size;
    968 			mtx_unlock(&np->n_mtx);
    969   		}
    970   	} else {
    971 		mtx_lock(&np->n_mtx);
    972 		if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) &&
    973 		    (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
    974 			mtx_unlock(&np->n_mtx);
    975 			if ((error = ncl_vinvalbuf(vp, V_SAVE, td, 1)) != 0 &&
    976 			    (error == EINTR || error == EIO))
    977 				return (error);
    978 		} else
    979 			mtx_unlock(&np->n_mtx);
    980 	}
    981 	error = nfs_setattrrpc(vp, vap, ap->a_cred, td);
    982 	if (error && vap->va_size != VNOVAL) {
    983 		mtx_lock(&np->n_mtx);
    984 		np->n_size = np->n_vattr.na_size = tsize;
    985 		vnode_pager_setsize(vp, tsize);
    986 		mtx_unlock(&np->n_mtx);
    987 	}
    988 	return (error);
    989 }
    990 
    991 /*
    992  * Do an nfs setattr rpc.
    993  */
    994 static int
    995 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
    996     struct thread *td)
    997 {
    998 	struct nfsnode *np = VTONFS(vp);
    999 	int error, ret, attrflag, i;
   1000 	struct nfsvattr nfsva;
   1001 
   1002 	if (NFS_ISV34(vp)) {
   1003 		mtx_lock(&np->n_mtx);
   1004 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
   1005 			np->n_accesscache[i].stamp = 0;
   1006 		np->n_flag |= NDELEGMOD;
   1007 		mtx_unlock(&np->n_mtx);
   1008 		KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp);
   1009 	}
   1010 	error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag,
   1011 	    NULL);
   1012 	if (attrflag) {
   1013 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
   1014 		if (ret && !error)
   1015 			error = ret;
   1016 	}
   1017 	if (error && NFS_ISV4(vp))
   1018 		error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid);
   1019 	return (error);
   1020 }
   1021 
   1022 /*
   1023  * nfs lookup call, one step at a time...
   1024  * First look in cache
   1025  * If not found, unlock the directory nfsnode and do the rpc
   1026  */
   1027 static int
   1028 nfs_lookup(struct vop_lookup_args *ap)
   1029 {
   1030 	struct componentname *cnp = ap->a_cnp;
   1031 	struct vnode *dvp = ap->a_dvp;
   1032 	struct vnode **vpp = ap->a_vpp;
   1033 	struct mount *mp = dvp->v_mount;
   1034 	int flags = cnp->cn_flags;
   1035 	struct vnode *newvp;
   1036 	struct nfsmount *nmp;
   1037 	struct nfsnode *np, *newnp;
   1038 	int error = 0, attrflag, dattrflag, ltype, ncticks;
   1039 	struct thread *td = cnp->cn_thread;
   1040 	struct nfsfh *nfhp;
   1041 	struct nfsvattr dnfsva, nfsva;
   1042 	struct vattr vattr;
   1043 	struct timespec nctime;
   1044 
   1045 	*vpp = NULLVP;
   1046 	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
   1047 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
   1048 		return (EROFS);
   1049 	if (dvp->v_type != VDIR)
   1050 		return (ENOTDIR);
   1051 	nmp = VFSTONFS(mp);
   1052 	np = VTONFS(dvp);
   1053 
   1054 	/* For NFSv4, wait until any remove is done. */
   1055 	mtx_lock(&np->n_mtx);
   1056 	while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) {
   1057 		np->n_flag |= NREMOVEWANT;
   1058 		(void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0);
   1059 	}
   1060 	mtx_unlock(&np->n_mtx);
   1061 
   1062 	if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0)
   1063 		return (error);
   1064 	error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks);
   1065 	if (error > 0 && error != ENOENT)
   1066 		return (error);
   1067 	if (error == -1) {
   1068 		/*
   1069 		 * Lookups of "." are special and always return the
   1070 		 * current directory.  cache_lookup() already handles
   1071 		 * associated locking bookkeeping, etc.
   1072 		 */
   1073 		if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
   1074 			/* XXX: Is this really correct? */
   1075 			if (cnp->cn_nameiop != LOOKUP &&
   1076 			    (flags & ISLASTCN))
   1077 				cnp->cn_flags |= SAVENAME;
   1078 			return (0);
   1079 		}
   1080 
   1081 		/*
   1082 		 * We only accept a positive hit in the cache if the
   1083 		 * change time of the file matches our cached copy.
   1084 		 * Otherwise, we discard the cache entry and fallback
   1085 		 * to doing a lookup RPC.  We also only trust cache
   1086 		 * entries for less than nm_nametimeo seconds.
   1087 		 *
   1088 		 * To better handle stale file handles and attributes,
   1089 		 * clear the attribute cache of this node if it is a
   1090 		 * leaf component, part of an open() call, and not
   1091 		 * locally modified before fetching the attributes.
   1092 		 * This should allow stale file handles to be detected
   1093 		 * here where we can fall back to a LOOKUP RPC to
   1094 		 * recover rather than having nfs_open() detect the
   1095 		 * stale file handle and failing open(2) with ESTALE.
   1096 		 */
   1097 		newvp = *vpp;
   1098 		newnp = VTONFS(newvp);
   1099 		if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
   1100 		    (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
   1101 		    !(newnp->n_flag & NMODIFIED)) {
   1102 			mtx_lock(&newnp->n_mtx);
   1103 			newnp->n_attrstamp = 0;
   1104 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
   1105 			mtx_unlock(&newnp->n_mtx);
   1106 		}
   1107 		if (nfscl_nodeleg(newvp, 0) == 0 ||
   1108 		    ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) &&
   1109 		    VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
   1110 		    timespeccmp(&vattr.va_ctime, &nctime, ==))) {
   1111 			NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
   1112 			if (cnp->cn_nameiop != LOOKUP &&
   1113 			    (flags & ISLASTCN))
   1114 				cnp->cn_flags |= SAVENAME;
   1115 			return (0);
   1116 		}
   1117 		cache_purge(newvp);
   1118 		if (dvp != newvp)
   1119 			vput(newvp);
   1120 		else
   1121 			vrele(newvp);
   1122 		*vpp = NULLVP;
   1123 	} else if (error == ENOENT) {
   1124 		if (dvp->v_iflag & VI_DOOMED)
   1125 			return (ENOENT);
   1126 		/*
   1127 		 * We only accept a negative hit in the cache if the
   1128 		 * modification time of the parent directory matches
   1129 		 * the cached copy in the name cache entry.
   1130 		 * Otherwise, we discard all of the negative cache
   1131 		 * entries for this directory.  We also only trust
   1132 		 * negative cache entries for up to nm_negnametimeo
   1133 		 * seconds.
   1134 		 */
   1135 		if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) &&
   1136 		    VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
   1137 		    timespeccmp(&vattr.va_mtime, &nctime, ==)) {
   1138 			NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
   1139 			return (ENOENT);
   1140 		}
   1141 		cache_purge_negative(dvp);
   1142 	}
   1143 
   1144 	error = 0;
   1145 	newvp = NULLVP;
   1146 	NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses);
   1147 	error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
   1148 	    cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
   1149 	    NULL);
   1150 	if (dattrflag)
   1151 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
   1152 	if (error) {
   1153 		if (newvp != NULLVP) {
   1154 			vput(newvp);
   1155 			*vpp = NULLVP;
   1156 		}
   1157 
   1158 		if (error != ENOENT) {
   1159 			if (NFS_ISV4(dvp))
   1160 				error = nfscl_maperr(td, error, (uid_t)0,
   1161 				    (gid_t)0);
   1162 			return (error);
   1163 		}
   1164 
   1165 		/* The requested file was not found. */
   1166 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
   1167 		    (flags & ISLASTCN)) {
   1168 			/*
   1169 			 * XXX: UFS does a full VOP_ACCESS(dvp,
   1170 			 * VWRITE) here instead of just checking
   1171 			 * MNT_RDONLY.
   1172 			 */
   1173 			if (mp->mnt_flag & MNT_RDONLY)
   1174 				return (EROFS);
   1175 			cnp->cn_flags |= SAVENAME;
   1176 			return (EJUSTRETURN);
   1177 		}
   1178 
   1179 		if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) {
   1180 			/*
   1181 			 * Cache the modification time of the parent
   1182 			 * directory from the post-op attributes in
   1183 			 * the name cache entry.  The negative cache
   1184 			 * entry will be ignored once the directory
   1185 			 * has changed.  Don't bother adding the entry
   1186 			 * if the directory has already changed.
   1187 			 */
   1188 			mtx_lock(&np->n_mtx);
   1189 			if (timespeccmp(&np->n_vattr.na_mtime,
   1190 			    &dnfsva.na_mtime, ==)) {
   1191 				mtx_unlock(&np->n_mtx);
   1192 				cache_enter_time(dvp, NULL, cnp,
   1193 				    &dnfsva.na_mtime, NULL);
   1194 			} else
   1195 				mtx_unlock(&np->n_mtx);
   1196 		}
   1197 		return (ENOENT);
   1198 	}
   1199 
   1200 	/*
   1201 	 * Handle RENAME case...
   1202 	 */
   1203 	if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
   1204 		if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
   1205 			FREE((caddr_t)nfhp, M_NFSFH);
   1206 			return (EISDIR);
   1207 		}
   1208 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
   1209 		    LK_EXCLUSIVE);
   1210 		if (error)
   1211 			return (error);
   1212 		newvp = NFSTOV(np);
   1213 		if (attrflag)
   1214 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
   1215 			    0, 1);
   1216 		*vpp = newvp;
   1217 		cnp->cn_flags |= SAVENAME;
   1218 		return (0);
   1219 	}
   1220 
   1221 	if (flags & ISDOTDOT) {
   1222 		ltype = NFSVOPISLOCKED(dvp);
   1223 		error = vfs_busy(mp, MBF_NOWAIT);
   1224 		if (error != 0) {
   1225 			vfs_ref(mp);
   1226 			NFSVOPUNLOCK(dvp, 0);
   1227 			error = vfs_busy(mp, 0);
   1228 			NFSVOPLOCK(dvp, ltype | LK_RETRY);
   1229 			vfs_rel(mp);
   1230 			if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
   1231 				vfs_unbusy(mp);
   1232 				error = ENOENT;
   1233 			}
   1234 			if (error != 0)
   1235 				return (error);
   1236 		}
   1237 		NFSVOPUNLOCK(dvp, 0);
   1238 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
   1239 		    cnp->cn_lkflags);
   1240 		if (error == 0)
   1241 			newvp = NFSTOV(np);
   1242 		vfs_unbusy(mp);
   1243 		if (newvp != dvp)
   1244 			NFSVOPLOCK(dvp, ltype | LK_RETRY);
   1245 		if (dvp->v_iflag & VI_DOOMED) {
   1246 			if (error == 0) {
   1247 				if (newvp == dvp)
   1248 					vrele(newvp);
   1249 				else
   1250 					vput(newvp);
   1251 			}
   1252 			error = ENOENT;
   1253 		}
   1254 		if (error != 0)
   1255 			return (error);
   1256 		if (attrflag)
   1257 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
   1258 			    0, 1);
   1259 	} else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
   1260 		FREE((caddr_t)nfhp, M_NFSFH);
   1261 		VREF(dvp);
   1262 		newvp = dvp;
   1263 		if (attrflag)
   1264 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
   1265 			    0, 1);
   1266 	} else {
   1267 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
   1268 		    cnp->cn_lkflags);
   1269 		if (error)
   1270 			return (error);
   1271 		newvp = NFSTOV(np);
   1272 		if (attrflag)
   1273 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
   1274 			    0, 1);
   1275 		else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
   1276 		    !(np->n_flag & NMODIFIED)) {
   1277 			/*
   1278 			 * Flush the attribute cache when opening a
   1279 			 * leaf node to ensure that fresh attributes
   1280 			 * are fetched in nfs_open() since we did not
   1281 			 * fetch attributes from the LOOKUP reply.
   1282 			 */
   1283 			mtx_lock(&np->n_mtx);
   1284 			np->n_attrstamp = 0;
   1285 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
   1286 			mtx_unlock(&np->n_mtx);
   1287 		}
   1288 	}
   1289 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
   1290 		cnp->cn_flags |= SAVENAME;
   1291 	if ((cnp->cn_flags & MAKEENTRY) &&
   1292 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) &&
   1293 	    attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0))
   1294 		cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
   1295 		    newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime);
   1296 	*vpp = newvp;
   1297 	return (0);
   1298 }
   1299 
   1300 /*
   1301  * nfs read call.
   1302  * Just call ncl_bioread() to do the work.
   1303  */
   1304 static int
   1305 nfs_read(struct vop_read_args *ap)
   1306 {
   1307 	struct vnode *vp = ap->a_vp;
   1308 
   1309 	switch (vp->v_type) {
   1310 	case VREG:
   1311 		return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
   1312 	case VDIR:
   1313 		return (EISDIR);
   1314 	default:
   1315 		return (EOPNOTSUPP);
   1316 	}
   1317 }
   1318 
   1319 /*
   1320  * nfs readlink call
   1321  */
   1322 static int
   1323 nfs_readlink(struct vop_readlink_args *ap)
   1324 {
   1325 	struct vnode *vp = ap->a_vp;
   1326 
   1327 	if (vp->v_type != VLNK)
   1328 		return (EINVAL);
   1329 	return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred));
   1330 }
   1331 
   1332 /*
   1333  * Do a readlink rpc.
   1334  * Called by ncl_doio() from below the buffer cache.
   1335  */
   1336 int
   1337 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
   1338 {
   1339 	int error, ret, attrflag;
   1340 	struct nfsvattr nfsva;
   1341 
   1342 	error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva,
   1343 	    &attrflag, NULL);
   1344 	if (attrflag) {
   1345 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
   1346 		if (ret && !error)
   1347 			error = ret;
   1348 	}
   1349 	if (error && NFS_ISV4(vp))
   1350 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
   1351 	return (error);
   1352 }
   1353 
   1354 /*
   1355  * nfs read rpc call
   1356  * Ditto above
   1357  */
   1358 int
   1359 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
   1360 {
   1361 	int error, ret, attrflag;
   1362 	struct nfsvattr nfsva;
   1363 	struct nfsmount *nmp;
   1364 
   1365 	nmp = VFSTONFS(vnode_mount(vp));
   1366 	error = EIO;
   1367 	attrflag = 0;
   1368 	if (NFSHASPNFS(nmp))
   1369 		error = nfscl_doiods(vp, uiop, NULL, NULL,
   1370 		    NFSV4OPEN_ACCESSREAD, cred, uiop->uio_td);
   1371 	NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error);
   1372 	if (error != 0)
   1373 		error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva,
   1374 		    &attrflag, NULL);
   1375 	if (attrflag) {
   1376 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
   1377 		if (ret && !error)
   1378 			error = ret;
   1379 	}
   1380 	if (error && NFS_ISV4(vp))
   1381 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
   1382 	return (error);
   1383 }
   1384 
   1385 /*
   1386  * nfs write call
   1387  */
   1388 int
   1389 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
   1390     int *iomode, int *must_commit, int called_from_strategy)
   1391 {
   1392 	struct nfsvattr nfsva;
   1393 	int error, attrflag, ret;
   1394 	struct nfsmount *nmp;
   1395 
   1396 	nmp = VFSTONFS(vnode_mount(vp));
   1397 	error = EIO;
   1398 	attrflag = 0;
   1399 	if (NFSHASPNFS(nmp))
   1400 		error = nfscl_doiods(vp, uiop, iomode, must_commit,
   1401 		    NFSV4OPEN_ACCESSWRITE, cred, uiop->uio_td);
   1402 	NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error);
   1403 	if (error != 0)
   1404 		error = nfsrpc_write(vp, uiop, iomode, must_commit, cred,
   1405 		    uiop->uio_td, &nfsva, &attrflag, NULL,
   1406 		    called_from_strategy);
   1407 	if (attrflag) {
   1408 		if (VTONFS(vp)->n_flag & ND_NFSV4)
   1409 			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1,
   1410 			    1);
   1411 		else
   1412 			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
   1413 			    1);
   1414 		if (ret && !error)
   1415 			error = ret;
   1416 	}
   1417 	if (DOINGASYNC(vp))
   1418 		*iomode = NFSWRITE_FILESYNC;
   1419 	if (error && NFS_ISV4(vp))
   1420 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
   1421 	return (error);
   1422 }
   1423 
   1424 /*
   1425  * nfs mknod rpc
   1426  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
   1427  * mode set to specify the file type and the size field for rdev.
   1428  */
   1429 static int
   1430 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
   1431     struct vattr *vap)
   1432 {
   1433 	struct nfsvattr nfsva, dnfsva;
   1434 	struct vnode *newvp = NULL;
   1435 	struct nfsnode *np = NULL, *dnp;
   1436 	struct nfsfh *nfhp;
   1437 	struct vattr vattr;
   1438 	int error = 0, attrflag, dattrflag;
   1439 	u_int32_t rdev;
   1440 
   1441 	if (vap->va_type == VCHR || vap->va_type == VBLK)
   1442 		rdev = vap->va_rdev;
   1443 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
   1444 		rdev = 0xffffffff;
   1445 	else
   1446 		return (EOPNOTSUPP);
   1447 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
   1448 		return (error);
   1449 	error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap,
   1450 	    rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva,
   1451 	    &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
   1452 	if (!error) {
   1453 		if (!nfhp)
   1454 			(void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
   1455 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
   1456 			    &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
   1457 			    NULL);
   1458 		if (nfhp)
   1459 			error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
   1460 			    cnp->cn_thread, &np, NULL, LK_EXCLUSIVE);
   1461 	}
   1462 	if (dattrflag)
   1463 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
   1464 	if (!error) {
   1465 		newvp = NFSTOV(np);
   1466 		if (attrflag != 0) {
   1467 			error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
   1468 			    0, 1);
   1469 			if (error != 0)
   1470 				vput(newvp);
   1471 		}
   1472 	}
   1473 	if (!error) {
   1474 		*vpp = newvp;
   1475 	} else if (NFS_ISV4(dvp)) {
   1476 		error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
   1477 		    vap->va_gid);
   1478 	}
   1479 	dnp = VTONFS(dvp);
   1480 	mtx_lock(&dnp->n_mtx);
   1481 	dnp->n_flag |= NMODIFIED;
   1482 	if (!dattrflag) {
   1483 		dnp->n_attrstamp = 0;
   1484 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
   1485 	}
   1486 	mtx_unlock(&dnp->n_mtx);
   1487 	return (error);
   1488 }
   1489 
   1490 /*
   1491  * nfs mknod vop
   1492  * just call nfs_mknodrpc() to do the work.
   1493  */
   1494 /* ARGSUSED */
   1495 static int
   1496 nfs_mknod(struct vop_mknod_args *ap)
   1497 {
   1498 	return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
   1499 }
   1500 
   1501 static struct mtx nfs_cverf_mtx;
   1502 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex",
   1503     MTX_DEF);
   1504 
   1505 static nfsquad_t
   1506 nfs_get_cverf(void)
   1507 {
   1508 	static nfsquad_t cverf;
   1509 	nfsquad_t ret;
   1510 	static int cverf_initialized = 0;
   1511 
   1512 	mtx_lock(&nfs_cverf_mtx);
   1513 	if (cverf_initialized == 0) {
   1514 		cverf.lval[0] = arc4random();
   1515 		cverf.lval[1] = arc4random();
   1516 		cverf_initialized = 1;
   1517 	} else
   1518 		cverf.qval++;
   1519 	ret = cverf;
   1520 	mtx_unlock(&nfs_cverf_mtx);
   1521 
   1522 	return (ret);
   1523 }
   1524 
   1525 /*
   1526  * nfs file create call
   1527  */
   1528 static int
   1529 nfs_create(struct vop_create_args *ap)
   1530 {
   1531 	struct vnode *dvp = ap->a_dvp;
   1532 	struct vattr *vap = ap->a_vap;
   1533 	struct componentname *cnp = ap->a_cnp;
   1534 	struct nfsnode *np = NULL, *dnp;
   1535 	struct vnode *newvp = NULL;
   1536 	struct nfsmount *nmp;
   1537 	struct nfsvattr dnfsva, nfsva;
   1538 	struct nfsfh *nfhp;
   1539 	nfsquad_t cverf;
   1540 	int error = 0, attrflag, dattrflag, fmode = 0;
   1541 	struct vattr vattr;
   1542 
   1543 	/*
   1544 	 * Oops, not for me..
   1545 	 */
   1546 	if (vap->va_type == VSOCK)
   1547 		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
   1548 
   1549 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
   1550 		return (error);
   1551 	if (vap->va_vaflags & VA_EXCLUSIVE)
   1552 		fmode |= O_EXCL;
   1553 	dnp = VTONFS(dvp);
   1554 	nmp = VFSTONFS(vnode_mount(dvp));
   1555 again:
   1556 	/* For NFSv4, wait until any remove is done. */
   1557 	mtx_lock(&dnp->n_mtx);
   1558 	while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) {
   1559 		dnp->n_flag |= NREMOVEWANT;
   1560 		(void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0);
   1561 	}
   1562 	mtx_unlock(&dnp->n_mtx);
   1563 
   1564 	cverf = nfs_get_cverf();
   1565 	error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen,
   1566 	    vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva,
   1567 	    &nfhp, &attrflag, &dattrflag, NULL);
   1568 	if (!error) {
   1569 		if (nfhp == NULL)
   1570 			(void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
   1571 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
   1572 			    &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
   1573 			    NULL);
   1574 		if (nfhp != NULL)
   1575 			error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
   1576 			    cnp->cn_thread, &np, NULL, LK_EXCLUSIVE);
   1577 	}
   1578 	if (dattrflag)
   1579 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
   1580 	if (!error) {
   1581 		newvp = NFSTOV(np);
   1582 		if (attrflag == 0)
   1583 			error = nfsrpc_getattr(newvp, cnp->cn_cred,
   1584 			    cnp->cn_thread, &nfsva, NULL);
   1585 		if (error == 0)
   1586 			error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
   1587 			    0, 1);
   1588 	}
   1589 	if (error) {
   1590 		if (newvp != NULL) {
   1591 			vput(newvp);
   1592 			newvp = NULL;
   1593 		}
   1594 		if (NFS_ISV34(dvp) && (fmode & O_EXCL) &&
   1595 		    error == NFSERR_NOTSUPP) {
   1596 			fmode &= ~O_EXCL;
   1597 			goto again;
   1598 		}
   1599 	} else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) {
   1600 		if (nfscl_checksattr(vap, &nfsva)) {
   1601 			error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred,
   1602 			    cnp->cn_thread, &nfsva, &attrflag, NULL);
   1603 			if (error && (vap->va_uid != (uid_t)VNOVAL ||
   1604 			    vap->va_gid != (gid_t)VNOVAL)) {
   1605 				/* try again without setting uid/gid */
   1606 				vap->va_uid = (uid_t)VNOVAL;
   1607 				vap->va_gid = (uid_t)VNOVAL;
   1608 				error = nfsrpc_setattr(newvp, vap, NULL,
   1609 				    cnp->cn_cred, cnp->cn_thread, &nfsva,
   1610 				    &attrflag, NULL);
   1611 			}
   1612 			if (attrflag)
   1613 				(void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
   1614 				    NULL, 0, 1);
   1615 			if (error != 0)
   1616 				vput(newvp);
   1617 		}
   1618 	}
   1619 	if (!error) {
   1620 		if ((cnp->cn_flags & MAKEENTRY) && attrflag)
   1621 			cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
   1622 			    NULL);
   1623 		*ap->a_vpp = newvp;
   1624 	} else if (NFS_ISV4(dvp)) {
   1625 		error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
   1626 		    vap->va_gid);
   1627 	}
   1628 	mtx_lock(&dnp->n_mtx);
   1629 	dnp->n_flag |= NMODIFIED;
   1630 	if (!dattrflag) {
   1631 		dnp->n_attrstamp = 0;
   1632 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
   1633 	}
   1634 	mtx_unlock(&dnp->n_mtx);
   1635 	return (error);
   1636 }
   1637 
   1638 /*
   1639  * nfs file remove call
   1640  * To try and make nfs semantics closer to ufs semantics, a file that has
   1641  * other processes using the vnode is renamed instead of removed and then
   1642  * removed later on the last close.
   1643  * - If v_usecount > 1
   1644  *	  If a rename is not already in the works
   1645  *	     call nfs_sillyrename() to set it up
   1646  *     else
   1647  *	  do the remove rpc
   1648  */
   1649 static int
   1650 nfs_remove(struct vop_remove_args *ap)
   1651 {
   1652 	struct vnode *vp = ap->a_vp;
   1653 	struct vnode *dvp = ap->a_dvp;
   1654 	struct componentname *cnp = ap->a_cnp;
   1655 	struct nfsnode *np = VTONFS(vp);
   1656 	int error = 0;
   1657 	struct vattr vattr;
   1658 
   1659 	KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name"));
   1660 	KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount"));
   1661 	if (vp->v_type == VDIR)
   1662 		error = EPERM;
   1663 	else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
   1664 	    VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 &&
   1665 	    vattr.va_nlink > 1)) {
   1666 		/*
   1667 		 * Purge the name cache so that the chance of a lookup for
   1668 		 * the name succeeding while the remove is in progress is
   1669 		 * minimized. Without node locking it can still happen, such
   1670 		 * that an I/O op returns ESTALE, but since you get this if
   1671 		 * another host removes the file..
   1672 		 */
   1673 		cache_purge(vp);
   1674 		/*
   1675 		 * throw away biocache buffers, mainly to avoid
   1676 		 * unnecessary delayed writes later.
   1677 		 */
   1678 		error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1);
   1679 		/* Do the rpc */
   1680 		if (error != EINTR && error != EIO)
   1681 			error = nfs_removerpc(dvp, vp, cnp->cn_nameptr,
   1682 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
   1683 		/*
   1684 		 * Kludge City: If the first reply to the remove rpc is lost..
   1685 		 *   the reply to the retransmitted request will be ENOENT
   1686 		 *   since the file was in fact removed
   1687 		 *   Therefore, we cheat and return success.
   1688 		 */
   1689 		if (error == ENOENT)
   1690 			error = 0;
   1691 	} else if (!np->n_sillyrename)
   1692 		error = nfs_sillyrename(dvp, vp, cnp);
   1693 	mtx_lock(&np->n_mtx);
   1694 	np->n_attrstamp = 0;
   1695 	mtx_unlock(&np->n_mtx);
   1696 	KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
   1697 	return (error);
   1698 }
   1699 
   1700 /*
   1701  * nfs file remove rpc called from nfs_inactive
   1702  */
   1703 int
   1704 ncl_removeit(struct sillyrename *sp, struct vnode *vp)
   1705 {
   1706 	/*
   1707 	 * Make sure that the directory vnode is still valid.
   1708 	 * XXX we should lock sp->s_dvp here.
   1709 	 */
   1710 	if (sp->s_dvp->v_type == VBAD)
   1711 		return (0);
   1712 	return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen,
   1713 	    sp->s_cred, NULL));
   1714 }
   1715 
   1716 /*
   1717  * Nfs remove rpc, called from nfs_remove() and ncl_removeit().
   1718  */
   1719 static int
   1720 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
   1721     int namelen, struct ucred *cred, struct thread *td)
   1722 {
   1723 	struct nfsvattr dnfsva;
   1724 	struct nfsnode *dnp = VTONFS(dvp);
   1725 	int error = 0, dattrflag;
   1726 
   1727 	mtx_lock(&dnp->n_mtx);
   1728 	dnp->n_flag |= NREMOVEINPROG;
   1729 	mtx_unlock(&dnp->n_mtx);
   1730 	error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva,
   1731 	    &dattrflag, NULL);
   1732 	mtx_lock(&dnp->n_mtx);
   1733 	if ((dnp->n_flag & NREMOVEWANT)) {
   1734 		dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG);
   1735 		mtx_unlock(&dnp->n_mtx);
   1736 		wakeup((caddr_t)dnp);
   1737 	} else {
   1738 		dnp->n_flag &= ~NREMOVEINPROG;
   1739 		mtx_unlock(&dnp->n_mtx);
   1740 	}
   1741 	if (dattrflag)
   1742 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
   1743 	mtx_lock(&dnp->n_mtx);
   1744 	dnp->n_flag |= NMODIFIED;
   1745 	if (!dattrflag) {
   1746 		dnp->n_attrstamp = 0;
   1747 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
   1748 	}
   1749 	mtx_unlock(&dnp->n_mtx);
   1750 	if (error && NFS_ISV4(dvp))
   1751 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
   1752 	return (error);
   1753 }
   1754 
   1755 /*
   1756  * nfs file rename call
   1757  */
   1758 static int
   1759 nfs_rename(struct vop_rename_args *ap)
   1760 {
   1761 	struct vnode *fvp = ap->a_fvp;
   1762 	struct vnode *tvp = ap->a_tvp;
   1763 	struct vnode *fdvp = ap->a_fdvp;
   1764 	struct vnode *tdvp = ap->a_tdvp;
   1765 	struct componentname *tcnp = ap->a_tcnp;
   1766 	struct componentname *fcnp = ap->a_fcnp;
   1767 	struct nfsnode *fnp = VTONFS(ap->a_fvp);
   1768 	struct nfsnode *tdnp = VTONFS(ap->a_tdvp);
   1769 	struct nfsv4node *newv4 = NULL;
   1770 	int error;
   1771 
   1772 	KASSERT((tcnp->cn_flags & HASBUF) != 0 &&
   1773 	    (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name"));
   1774 	/* Check for cross-device rename */
   1775 	if ((fvp->v_mount != tdvp->v_mount) ||
   1776 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
   1777 		error = EXDEV;
   1778 		goto out;
   1779 	}
   1780 
   1781 	if (fvp == tvp) {
   1782 		printf("nfs_rename: fvp == tvp (can't happen)\n");
   1783 		error = 0;
   1784 		goto out;
   1785 	}
   1786 	if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0)
   1787 		goto out;
   1788 
   1789 	/*
   1790 	 * We have to flush B_DELWRI data prior to renaming
   1791 	 * the file.  If we don't, the delayed-write buffers
   1792 	 * can be flushed out later after the file has gone stale
   1793 	 * under NFSV3.  NFSV2 does not have this problem because
   1794 	 * ( as far as I can tell ) it flushes dirty buffers more
   1795 	 * often.
   1796 	 *
   1797 	 * Skip the rename operation if the fsync fails, this can happen
   1798 	 * due to the server's volume being full, when we pushed out data
   1799 	 * that was written back to our cache earlier. Not checking for
   1800 	 * this condition can result in potential (silent) data loss.
   1801 	 */
   1802 	error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
   1803 	NFSVOPUNLOCK(fvp, 0);
   1804 	if (!error && tvp)
   1805 		error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
   1806 	if (error)
   1807 		goto out;
   1808 
   1809 	/*
   1810 	 * If the tvp exists and is in use, sillyrename it before doing the
   1811 	 * rename of the new file over it.
   1812 	 * XXX Can't sillyrename a directory.
   1813 	 */
   1814 	if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
   1815 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
   1816 		vput(tvp);
   1817 		tvp = NULL;
   1818 	}
   1819 
   1820 	error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen,
   1821 	    tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
   1822 	    tcnp->cn_thread);
   1823 
   1824 	if (error == 0 && NFS_ISV4(tdvp)) {
   1825 		/*
   1826 		 * For NFSv4, check to see if it is the same name and
   1827 		 * replace the name, if it is different.
   1828 		 */
   1829 		MALLOC(newv4, struct nfsv4node *,
   1830 		    sizeof (struct nfsv4node) +
   1831 		    tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1,
   1832 		    M_NFSV4NODE, M_WAITOK);
   1833 		mtx_lock(&tdnp->n_mtx);
   1834 		mtx_lock(&fnp->n_mtx);
   1835 		if (fnp->n_v4 != NULL && fvp->v_type == VREG &&
   1836 		    (fnp->n_v4->n4_namelen != tcnp->cn_namelen ||
   1837 		      NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4),
   1838 		      tcnp->cn_namelen) ||
   1839 		      tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen ||
   1840 		      NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
   1841 			tdnp->n_fhp->nfh_len))) {
   1842 #ifdef notdef
   1843 { char nnn[100]; int nnnl;
   1844 nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99;
   1845 bcopy(tcnp->cn_nameptr, nnn, nnnl);
   1846 nnn[nnnl] = '\0';
   1847 printf("ren replace=%s\n",nnn);
   1848 }
   1849 #endif
   1850 			FREE((caddr_t)fnp->n_v4, M_NFSV4NODE);
   1851 			fnp->n_v4 = newv4;
   1852 			newv4 = NULL;
   1853 			fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len;
   1854 			fnp->n_v4->n4_namelen = tcnp->cn_namelen;
   1855 			NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
   1856 			    tdnp->n_fhp->nfh_len);
   1857 			NFSBCOPY(tcnp->cn_nameptr,
   1858 			    NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen);
   1859 		}
   1860 		mtx_unlock(&tdnp->n_mtx);
   1861 		mtx_unlock(&fnp->n_mtx);
   1862 		if (newv4 != NULL)
   1863 			FREE((caddr_t)newv4, M_NFSV4NODE);
   1864 	}
   1865 
   1866 	if (fvp->v_type == VDIR) {
   1867 		if (tvp != NULL && tvp->v_type == VDIR)
   1868 			cache_purge(tdvp);
   1869 		cache_purge(fdvp);
   1870 	}
   1871 
   1872 out:
   1873 	if (tdvp == tvp)
   1874 		vrele(tdvp);
   1875 	else
   1876 		vput(tdvp);
   1877 	if (tvp)
   1878 		vput(tvp);
   1879 	vrele(fdvp);
   1880 	vrele(fvp);
   1881 	/*
   1882 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
   1883 	 */
   1884 	if (error == ENOENT)
   1885 		error = 0;
   1886 	return (error);
   1887 }
   1888 
   1889 /*
   1890  * nfs file rename rpc called from nfs_remove() above
   1891  */
   1892 static int
   1893 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp,
   1894     struct sillyrename *sp)
   1895 {
   1896 
   1897 	return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen,
   1898 	    sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred,
   1899 	    scnp->cn_thread));
   1900 }
   1901 
   1902 /*
   1903  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
   1904  */
   1905 static int
   1906 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
   1907     int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr,
   1908     int tnamelen, struct ucred *cred, struct thread *td)
   1909 {
   1910 	struct nfsvattr fnfsva, tnfsva;
   1911 	struct nfsnode *fdnp = VTONFS(fdvp);
   1912 	struct nfsnode *tdnp = VTONFS(tdvp);
   1913 	int error = 0, fattrflag, tattrflag;
   1914 
   1915 	error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp,
   1916 	    tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag,
   1917 	    &tattrflag, NULL, NULL);
   1918 	mtx_lock(&fdnp->n_mtx);
   1919 	fdnp->n_flag |= NMODIFIED;
   1920 	if (fattrflag != 0) {
   1921 		mtx_unlock(&fdnp->n_mtx);
   1922 		(void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1);
   1923 	} else {
   1924 		fdnp->n_attrstamp = 0;
   1925 		mtx_unlock(&fdnp->n_mtx);
   1926 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp);
   1927 	}
   1928 	mtx_lock(&tdnp->n_mtx);
   1929 	tdnp->n_flag |= NMODIFIED;
   1930 	if (tattrflag != 0) {
   1931 		mtx_unlock(&tdnp->n_mtx);
   1932 		(void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1);
   1933 	} else {
   1934 		tdnp->n_attrstamp = 0;
   1935 		mtx_unlock(&tdnp->n_mtx);
   1936 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
   1937 	}
   1938 	if (error && NFS_ISV4(fdvp))
   1939 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
   1940 	return (error);
   1941 }
   1942 
   1943 /*
   1944  * nfs hard link create call
   1945  */
   1946 static int
   1947 nfs_link(struct vop_link_args *ap)
   1948 {
   1949 	struct vnode *vp = ap->a_vp;
   1950 	struct vnode *tdvp = ap->a_tdvp;
   1951 	struct componentname *cnp = ap->a_cnp;
   1952 	struct nfsnode *np, *tdnp;
   1953 	struct nfsvattr nfsva, dnfsva;
   1954 	int error = 0, attrflag, dattrflag;
   1955 
   1956 	/*
   1957 	 * Push all writes to the server, so that the attribute cache
   1958 	 * doesn't get "out of sync" with the server.
   1959 	 * XXX There should be a better way!
   1960 	 */
   1961 	VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
   1962 
   1963 	error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen,
   1964 	    cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag,
   1965 	    &dattrflag, NULL);
   1966 	tdnp = VTONFS(tdvp);
   1967 	mtx_lock(&tdnp->n_mtx);
   1968 	tdnp->n_flag |= NMODIFIED;
   1969 	if (dattrflag != 0) {
   1970 		mtx_unlock(&tdnp->n_mtx);
   1971 		(void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1);
   1972 	} else {
   1973 		tdnp->n_attrstamp = 0;
   1974 		mtx_unlock(&tdnp->n_mtx);
   1975 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
   1976 	}
   1977 	if (attrflag)
   1978 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
   1979 	else {
   1980 		np = VTONFS(vp);
   1981 		mtx_lock(&np->n_mtx);
   1982 		np->n_attrstamp = 0;
   1983 		mtx_unlock(&np->n_mtx);
   1984 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
   1985 	}
   1986 	/*
   1987 	 * If negative lookup caching is enabled, I might as well
   1988 	 * add an entry for this node. Not necessary for correctness,
   1989 	 * but if negative caching is enabled, then the system
   1990 	 * must care about lookup caching hit rate, so...
   1991 	 */
   1992 	if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 &&
   1993 	    (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) {
   1994 		cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL);
   1995 	}
   1996 	if (error && NFS_ISV4(vp))
   1997 		error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
   1998 		    (gid_t)0);
   1999 	return (error);
   2000 }
   2001 
   2002 /*
   2003  * nfs symbolic link create call
   2004  */
   2005 static int
   2006 nfs_symlink(struct vop_symlink_args *ap)
   2007 {
   2008 	struct vnode *dvp = ap->a_dvp;
   2009 	struct vattr *vap = ap->a_vap;
   2010 	struct componentname *cnp = ap->a_cnp;
   2011 	struct nfsvattr nfsva, dnfsva;
   2012 	struct nfsfh *nfhp;
   2013 	struct nfsnode *np = NULL, *dnp;
   2014 	struct vnode *newvp = NULL;
   2015 	int error = 0, attrflag, dattrflag, ret;
   2016 
   2017 	vap->va_type = VLNK;
   2018 	error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen,
   2019 	    ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva,
   2020 	    &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
   2021 	if (nfhp) {
   2022 		ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
   2023 		    &np, NULL, LK_EXCLUSIVE);
   2024 		if (!ret)
   2025 			newvp = NFSTOV(np);
   2026 		else if (!error)
   2027 			error = ret;
   2028 	}
   2029 	if (newvp != NULL) {
   2030 		if (attrflag)
   2031 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
   2032 			    0, 1);
   2033 	} else if (!error) {
   2034 		/*
   2035 		 * If we do not have an error and we could not extract the
   2036 		 * newvp from the response due to the request being NFSv2, we
   2037 		 * have to do a lookup in order to obtain a newvp to return.
   2038 		 */
   2039 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
   2040 		    cnp->cn_cred, cnp->cn_thread, &np);
   2041 		if (!error)
   2042 			newvp = NFSTOV(np);
   2043 	}
   2044 	if (error) {
   2045 		if (newvp)
   2046 			vput(newvp);
   2047 		if (NFS_ISV4(dvp))
   2048 			error = nfscl_maperr(cnp->cn_thread, error,
   2049 			    vap->va_uid, vap->va_gid);
   2050 	} else {
   2051 		*ap->a_vpp = newvp;
   2052 	}
   2053 
   2054 	dnp = VTONFS(dvp);
   2055 	mtx_lock(&dnp->n_mtx);
   2056 	dnp->n_flag |= NMODIFIED;
   2057 	if (dattrflag != 0) {
   2058 		mtx_unlock(&dnp->n_mtx);
   2059 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
   2060 	} else {
   2061 		dnp->n_attrstamp = 0;
   2062 		mtx_unlock(&dnp->n_mtx);
   2063 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
   2064 	}
   2065 	/*
   2066 	 * If negative lookup caching is enabled, I might as well
   2067 	 * add an entry for this node. Not necessary for correctness,
   2068 	 * but if negative caching is enabled, then the system
   2069 	 * must care about lookup caching hit rate, so...
   2070 	 */
   2071 	if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
   2072 	    (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) {
   2073 		cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL);
   2074 	}
   2075 	return (error);
   2076 }
   2077 
   2078 /*
   2079  * nfs make dir call
   2080  */
   2081 static int
   2082 nfs_mkdir(struct vop_mkdir_args *ap)
   2083 {
   2084 	struct vnode *dvp = ap->a_dvp;
   2085 	struct vattr *vap = ap->a_vap;
   2086 	struct componentname *cnp = ap->a_cnp;
   2087 	struct nfsnode *np = NULL, *dnp;
   2088 	struct vnode *newvp = NULL;
   2089 	struct vattr vattr;
   2090 	struct nfsfh *nfhp;
   2091 	struct nfsvattr nfsva, dnfsva;
   2092 	int error = 0, attrflag, dattrflag, ret;
   2093 
   2094 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
   2095 		return (error);
   2096 	vap->va_type = VDIR;
   2097 	error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
   2098 	    vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp,
   2099 	    &attrflag, &dattrflag, NULL);
   2100 	dnp = VTONFS(dvp);
   2101 	mtx_lock(&dnp->n_mtx);
   2102 	dnp->n_flag |= NMODIFIED;
   2103 	if (dattrflag != 0) {
   2104 		mtx_unlock(&dnp->n_mtx);
   2105 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
   2106 	} else {
   2107 		dnp->n_attrstamp = 0;
   2108 		mtx_unlock(&dnp->n_mtx);
   2109 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
   2110 	}
   2111 	if (nfhp) {
   2112 		ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
   2113 		    &np, NULL, LK_EXCLUSIVE);
   2114 		if (!ret) {
   2115 			newvp = NFSTOV(np);
   2116 			if (attrflag)
   2117 			   (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
   2118 				NULL, 0, 1);
   2119 		} else if (!error)
   2120 			error = ret;
   2121 	}
   2122 	if (!error && newvp == NULL) {
   2123 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
   2124 		    cnp->cn_cred, cnp->cn_thread, &np);
   2125 		if (!error) {
   2126 			newvp = NFSTOV(np);
   2127 			if (newvp->v_type != VDIR)
   2128 				error = EEXIST;
   2129 		}
   2130 	}
   2131 	if (error) {
   2132 		if (newvp)
   2133 			vput(newvp);
   2134 		if (NFS_ISV4(dvp))
   2135 			error = nfscl_maperr(cnp->cn_thread, error,
   2136 			    vap->va_uid, vap->va_gid);
   2137 	} else {
   2138 		/*
   2139 		 * If negative lookup caching is enabled, I might as well
   2140 		 * add an entry for this node. Not necessary for correctness,
   2141 		 * but if negative caching is enabled, then the system
   2142 		 * must care about lookup caching hit rate, so...
   2143 		 */
   2144 		if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
   2145 		    (cnp->cn_flags & MAKEENTRY) &&
   2146 		    attrflag != 0 && dattrflag != 0)
   2147 			cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
   2148 			    &dnfsva.na_ctime);
   2149 		*ap->a_vpp = newvp;
   2150 	}
   2151 	return (error);
   2152 }
   2153 
   2154 /*
   2155  * nfs remove directory call
   2156  */
   2157 static int
   2158 nfs_rmdir(struct vop_rmdir_args *ap)
   2159 {
   2160 	struct vnode *vp = ap->a_vp;
   2161 	struct vnode *dvp = ap->a_dvp;
   2162 	struct componentname *cnp = ap->a_cnp;
   2163 	struct nfsnode *dnp;
   2164 	struct nfsvattr dnfsva;
   2165 	int error, dattrflag;
   2166 
   2167 	if (dvp == vp)
   2168 		return (EINVAL);
   2169 	error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
   2170 	    cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL);
   2171 	dnp = VTONFS(dvp);
   2172 	mtx_lock(&dnp->n_mtx);
   2173 	dnp->n_flag |= NMODIFIED;
   2174 	if (dattrflag != 0) {
   2175 		mtx_unlock(&dnp->n_mtx);
   2176 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
   2177 	} else {
   2178 		dnp->n_attrstamp = 0;
   2179 		mtx_unlock(&dnp->n_mtx);
   2180 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
   2181 	}
   2182 
   2183 	cache_purge(dvp);
   2184 	cache_purge(vp);
   2185 	if (error && NFS_ISV4(dvp))
   2186 		error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
   2187 		    (gid_t)0);
   2188 	/*
   2189 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
   2190 	 */
   2191 	if (error == ENOENT)
   2192 		error = 0;
   2193 	return (error);
   2194 }
   2195 
   2196 /*
   2197  * nfs readdir call
   2198  */
   2199 static int
   2200 nfs_readdir(struct vop_readdir_args *ap)
   2201 {
   2202 	struct vnode *vp = ap->a_vp;
   2203 	struct nfsnode *np = VTONFS(vp);
   2204 	struct uio *uio = ap->a_uio;
   2205 	ssize_t tresid, left;
   2206 	int error = 0;
   2207 	struct vattr vattr;
   2208 
   2209 	if (ap->a_eofflag != NULL)
   2210 		*ap->a_eofflag = 0;
   2211 	if (vp->v_type != VDIR)
   2212 		return(EPERM);
   2213 
   2214 	/*
   2215 	 * First, check for hit on the EOF offset cache
   2216 	 */
   2217 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
   2218 	    (np->n_flag & NMODIFIED) == 0) {
   2219 		if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) {
   2220 			mtx_lock(&np->n_mtx);
   2221 			if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) ||
   2222 			    !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
   2223 				mtx_unlock(&np->n_mtx);
   2224 				NFSINCRGLOBAL(nfsstatsv1.direofcache_hits);
   2225 				if (ap->a_eofflag != NULL)
   2226 					*ap->a_eofflag = 1;
   2227 				return (0);
   2228 			} else
   2229 				mtx_unlock(&np->n_mtx);
   2230 		}
   2231 	}
   2232 
   2233 	/*
   2234 	 * NFS always guarantees that directory entries don't straddle
   2235 	 * DIRBLKSIZ boundaries.  As such, we need to limit the size
   2236 	 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial
   2237 	 * directory entry.
   2238 	 */
   2239 	left = uio->uio_resid % DIRBLKSIZ;
   2240 	if (left == uio->uio_resid)
   2241 		return (EINVAL);
   2242 	uio->uio_resid -= left;
   2243 
   2244 	/*
   2245 	 * Call ncl_bioread() to do the real work.
   2246 	 */
   2247 	tresid = uio->uio_resid;
   2248 	error = ncl_bioread(vp, uio, 0, ap->a_cred);
   2249 
   2250 	if (!error && uio->uio_resid == tresid) {
   2251 		NFSINCRGLOBAL(nfsstatsv1.direofcache_misses);
   2252 		if (ap->a_eofflag != NULL)
   2253 			*ap->a_eofflag = 1;
   2254 	}
   2255 
   2256 	/* Add the partial DIRBLKSIZ (left) back in. */
   2257 	uio->uio_resid += left;
   2258 	return (error);
   2259 }
   2260 
   2261 /*
   2262  * Readdir rpc call.
   2263  * Called from below the buffer cache by ncl_doio().
   2264  */
   2265 int
   2266 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
   2267     struct thread *td)
   2268 {
   2269 	struct nfsvattr nfsva;
   2270 	nfsuint64 *cookiep, cookie;
   2271 	struct nfsnode *dnp = VTONFS(vp);
   2272 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
   2273 	int error = 0, eof, attrflag;
   2274 
   2275 	KASSERT(uiop->uio_iovcnt == 1 &&
   2276 	    (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
   2277 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
   2278 	    ("nfs readdirrpc bad uio"));
   2279 
   2280 	/*
   2281 	 * If there is no cookie, assume directory was stale.
   2282 	 */
   2283 	ncl_dircookie_lock(dnp);
   2284 	cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
   2285 	if (cookiep) {
   2286 		cookie = *cookiep;
   2287 		ncl_dircookie_unlock(dnp);
   2288 	} else {
   2289 		ncl_dircookie_unlock(dnp);
   2290 		return (NFSERR_BAD_COOKIE);
   2291 	}
   2292 
   2293 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
   2294 		(void)ncl_fsinfo(nmp, vp, cred, td);
   2295 
   2296 	error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva,
   2297 	    &attrflag, &eof, NULL);
   2298 	if (attrflag)
   2299 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
   2300 
   2301 	if (!error) {
   2302 		/*
   2303 		 * We are now either at the end of the directory or have filled
   2304 		 * the block.
   2305 		 */
   2306 		if (eof)
   2307 			dnp->n_direofoffset = uiop->uio_offset;
   2308 		else {
   2309 			if (uiop->uio_resid > 0)
   2310 				printf("EEK! readdirrpc resid > 0\n");
   2311 			ncl_dircookie_lock(dnp);
   2312 			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
   2313 			*cookiep = cookie;
   2314 			ncl_dircookie_unlock(dnp);
   2315 		}
   2316 	} else if (NFS_ISV4(vp)) {
   2317 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
   2318 	}
   2319 	return (error);
   2320 }
   2321 
   2322 /*
   2323  * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc().
   2324  */
   2325 int
   2326 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
   2327     struct thread *td)
   2328 {
   2329 	struct nfsvattr nfsva;
   2330 	nfsuint64 *cookiep, cookie;
   2331 	struct nfsnode *dnp = VTONFS(vp);
   2332 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
   2333 	int error = 0, attrflag, eof;
   2334 
   2335 	KASSERT(uiop->uio_iovcnt == 1 &&
   2336 	    (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
   2337 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
   2338 	    ("nfs readdirplusrpc bad uio"));
   2339 
   2340 	/*
   2341 	 * If there is no cookie, assume directory was stale.
   2342 	 */
   2343 	ncl_dircookie_lock(dnp);
   2344 	cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
   2345 	if (cookiep) {
   2346 		cookie = *cookiep;
   2347 		ncl_dircookie_unlock(dnp);
   2348 	} else {
   2349 		ncl_dircookie_unlock(dnp);
   2350 		return (NFSERR_BAD_COOKIE);
   2351 	}
   2352 
   2353 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
   2354 		(void)ncl_fsinfo(nmp, vp, cred, td);
   2355 	error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva,
   2356 	    &attrflag, &eof, NULL);
   2357 	if (attrflag)
   2358 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
   2359 
   2360 	if (!error) {
   2361 		/*
   2362 		 * We are now either at end of the directory or have filled the
   2363 		 * the block.
   2364 		 */
   2365 		if (eof)
   2366 			dnp->n_direofoffset = uiop->uio_offset;
   2367 		else {
   2368 			if (uiop->uio_resid > 0)
   2369 				printf("EEK! readdirplusrpc resid > 0\n");
   2370 			ncl_dircookie_lock(dnp);
   2371 			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
   2372 			*cookiep = cookie;
   2373 			ncl_dircookie_unlock(dnp);
   2374 		}
   2375 	} else if (NFS_ISV4(vp)) {
   2376 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
   2377 	}
   2378 	return (error);
   2379 }
   2380 
   2381 /*
   2382  * Silly rename. To make the NFS filesystem that is stateless look a little
   2383  * more like the "ufs" a remove of an active vnode is translated to a rename
   2384  * to a funny looking filename that is removed by nfs_inactive on the
   2385  * nfsnode. There is the potential for another process on a different client
   2386  * to create the same funny name between the nfs_lookitup() fails and the
   2387  * nfs_rename() completes, but...
   2388  */
   2389 static int
   2390 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
   2391 {
   2392 	struct sillyrename *sp;
   2393 	struct nfsnode *np;
   2394 	int error;
   2395 	short pid;
   2396 	unsigned int lticks;
   2397 
   2398 	cache_purge(dvp);
   2399 	np = VTONFS(vp);
   2400 	KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir"));
   2401 	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
   2402 	    M_NEWNFSREQ, M_WAITOK);
   2403 	sp->s_cred = crhold(cnp->cn_cred);
   2404 	sp->s_dvp = dvp;
   2405 	VREF(dvp);
   2406 
   2407 	/*
   2408 	 * Fudge together a funny name.
   2409 	 * Changing the format of the funny name to accommodate more
   2410 	 * sillynames per directory.
   2411 	 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is
   2412 	 * CPU ticks since boot.
   2413 	 */
   2414 	pid = cnp->cn_thread->td_proc->p_pid;
   2415 	lticks = (unsigned int)ticks;
   2416 	for ( ; ; ) {
   2417 		sp->s_namlen = snprintf(sp->s_name, sizeof(sp->s_name),
   2418 				       ".nfs.%08x.%04x4.4", lticks,
   2419 				       pid);
   2420 		if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
   2421 				 cnp->cn_thread, NULL))
   2422 			break;
   2423 		lticks++;
   2424 	}
   2425 	error = nfs_renameit(dvp, vp, cnp, sp);
   2426 	if (error)
   2427 		goto bad;
   2428 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
   2429 		cnp->cn_thread, &np);
   2430 	np->n_sillyrename = sp;
   2431 	return (0);
   2432 bad:
   2433 	vrele(sp->s_dvp);
   2434 	crfree(sp->s_cred);
   2435 	free((caddr_t)sp, M_NEWNFSREQ);
   2436 	return (error);
   2437 }
   2438 
   2439 /*
   2440  * Look up a file name and optionally either update the file handle or
   2441  * allocate an nfsnode, depending on the value of npp.
   2442  * npp == NULL	--> just do the lookup
   2443  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
   2444  *			handled too
   2445  * *npp != NULL --> update the file handle in the vnode
   2446  */
   2447 static int
   2448 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred,
   2449     struct thread *td, struct nfsnode **npp)
   2450 {
   2451 	struct vnode *newvp = NULL, *vp;
   2452 	struct nfsnode *np, *dnp = VTONFS(dvp);
   2453 	struct nfsfh *nfhp, *onfhp;
   2454 	struct nfsvattr nfsva, dnfsva;
   2455 	struct componentname cn;
   2456 	int error = 0, attrflag, dattrflag;
   2457 	u_int hash;
   2458 
   2459 	error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva,
   2460 	    &nfhp, &attrflag, &dattrflag, NULL);
   2461 	if (dattrflag)
   2462 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
   2463 	if (npp && !error) {
   2464 		if (*npp != NULL) {
   2465 		    np = *npp;
   2466 		    vp = NFSTOV(np);
   2467 		    /*
   2468 		     * For NFSv4, check to see if it is the same name and
   2469 		     * replace the name, if it is different.
   2470 		     */
   2471 		    if (np->n_v4 != NULL && nfsva.na_type == VREG &&
   2472 			(np->n_v4->n4_namelen != len ||
   2473 			 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) ||
   2474 			 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen ||
   2475 			 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
   2476 			 dnp->n_fhp->nfh_len))) {
   2477 #ifdef notdef
   2478 { char nnn[100]; int nnnl;
   2479 nnnl = (len < 100) ? len : 99;
   2480 bcopy(name, nnn, nnnl);
   2481 nnn[nnnl] = '\0';
   2482 printf("replace=%s\n",nnn);
   2483 }
   2484 #endif
   2485 			    FREE((caddr_t)np->n_v4, M_NFSV4NODE);
   2486 			    MALLOC(np->n_v4, struct nfsv4node *,
   2487 				sizeof (struct nfsv4node) +
   2488 				dnp->n_fhp->nfh_len + len - 1,
   2489 				M_NFSV4NODE, M_WAITOK);
   2490 			    np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len;
   2491 			    np->n_v4->n4_namelen = len;
   2492 			    NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
   2493 				dnp->n_fhp->nfh_len);
   2494 			    NFSBCOPY(name, NFS4NODENAME(np->n_v4), len);
   2495 		    }
   2496 		    hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len,
   2497 			FNV1_32_INIT);
   2498 		    onfhp = np->n_fhp;
   2499 		    /*
   2500 		     * Rehash node for new file handle.
   2501 		     */
   2502 		    vfs_hash_rehash(vp, hash);
   2503 		    np->n_fhp = nfhp;
   2504 		    if (onfhp != NULL)
   2505 			FREE((caddr_t)onfhp, M_NFSFH);
   2506 		    newvp = NFSTOV(np);
   2507 		} else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) {
   2508 		    FREE((caddr_t)nfhp, M_NFSFH);
   2509 		    VREF(dvp);
   2510 		    newvp = dvp;
   2511 		} else {
   2512 		    cn.cn_nameptr = name;
   2513 		    cn.cn_namelen = len;
   2514 		    error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td,
   2515 			&np, NULL, LK_EXCLUSIVE);
   2516 		    if (error)
   2517 			return (error);
   2518 		    newvp = NFSTOV(np);
   2519 		}
   2520 		if (!attrflag && *npp == NULL) {
   2521 			if (newvp == dvp)
   2522 				vrele(newvp);
   2523 			else
   2524 				vput(newvp);
   2525 			return (ENOENT);
   2526 		}
   2527 		if (attrflag)
   2528 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
   2529 			    0, 1);
   2530 	}
   2531 	if (npp && *npp == NULL) {
   2532 		if (error) {
   2533 			if (newvp) {
   2534 				if (newvp == dvp)
   2535 					vrele(newvp);
   2536 				else
   2537 					vput(newvp);
   2538 			}
   2539 		} else
   2540 			*npp = np;
   2541 	}
   2542 	if (error && NFS_ISV4(dvp))
   2543 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
   2544 	return (error);
   2545 }
   2546 
   2547 /*
   2548  * Nfs Version 3 and 4 commit rpc
   2549  */
   2550 int
   2551 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
   2552    struct thread *td)
   2553 {
   2554 	struct nfsvattr nfsva;
   2555 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
   2556 	int error, attrflag;
   2557 
   2558 	mtx_lock(&nmp->nm_mtx);
   2559 	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
   2560 		mtx_unlock(&nmp->nm_mtx);
   2561 		return (0);
   2562 	}
   2563 	mtx_unlock(&nmp->nm_mtx);
   2564 	error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva,
   2565 	    &attrflag, NULL);
   2566 	if (attrflag != 0)
   2567 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL,
   2568 		    0, 1);
   2569 	if (error != 0 && NFS_ISV4(vp))
   2570 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
   2571 	return (error);
   2572 }
   2573 
   2574 /*
   2575  * Strategy routine.
   2576  * For async requests when nfsiod(s) are running, queue the request by
   2577  * calling ncl_asyncio(), otherwise just all ncl_doio() to do the
   2578  * request.
   2579  */
   2580 static int
   2581 nfs_strategy(struct vop_strategy_args *ap)
   2582 {
   2583 	struct buf *bp = ap->a_bp;
   2584 	struct ucred *cr;
   2585 
   2586 	KASSERT(!(bp->b_flags & B_DONE),
   2587 	    ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
   2588 	BUF_ASSERT_HELD(bp);
   2589 
   2590 	if (bp->b_iocmd == BIO_READ)
   2591 		cr = bp->b_rcred;
   2592 	else
   2593 		cr = bp->b_wcred;
   2594 
   2595 	/*
   2596 	 * If the op is asynchronous and an i/o daemon is waiting
   2597 	 * queue the request, wake it up and wait for completion
   2598 	 * otherwise just do it ourselves.
   2599 	 */
   2600 	if ((bp->b_flags & B_ASYNC) == 0 ||
   2601 	    ncl_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
   2602 		(void) ncl_doio(ap->a_vp, bp, cr, curthread, 1);
   2603 	return (0);
   2604 }
   2605 
   2606 /*
   2607  * fsync vnode op. Just call ncl_flush() with commit == 1.
   2608  */
   2609 /* ARGSUSED */
   2610 static int
   2611 nfs_fsync(struct vop_fsync_args *ap)
   2612 {
   2613 
   2614 	if (ap->a_vp->v_type != VREG) {
   2615 		/*
   2616 		 * For NFS, metadata is changed synchronously on the server,
   2617 		 * so there is nothing to flush. Also, ncl_flush() clears
   2618 		 * the NMODIFIED flag and that shouldn't be done here for
   2619 		 * directories.
   2620 		 */
   2621 		return (0);
   2622 	}
   2623 	return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0));
   2624 }
   2625 
   2626 /*
   2627  * Flush all the blocks associated with a vnode.
   2628  * 	Walk through the buffer pool and push any dirty pages
   2629  *	associated with the vnode.
   2630  * If the called_from_renewthread argument is TRUE, it has been called
   2631  * from the NFSv4 renew thread and, as such, cannot block indefinitely
   2632  * waiting for a buffer write to complete.
   2633  */
   2634 int
   2635 ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td,
   2636     int commit, int called_from_renewthread)
   2637 {
   2638 	struct nfsnode *np = VTONFS(vp);
   2639 	struct buf *bp;
   2640 	int i;
   2641 	struct buf *nbp;
   2642 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
   2643 	int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
   2644 	int passone = 1, trycnt = 0;
   2645 	u_quad_t off, endoff, toff;
   2646 	struct ucred* wcred = NULL;
   2647 	struct buf **bvec = NULL;
   2648 	struct bufobj *bo;
   2649 #ifndef NFS_COMMITBVECSIZ
   2650 #define	NFS_COMMITBVECSIZ	20
   2651 #endif
   2652 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
   2653 	int bvecsize = 0, bveccount;
   2654 
   2655 	if (called_from_renewthread != 0)
   2656 		slptimeo = hz;
   2657 	if (nmp->nm_flag & NFSMNT_INT)
   2658 		slpflag = PCATCH;
   2659 	if (!commit)
   2660 		passone = 0;
   2661 	bo = &vp->v_bufobj;
   2662 	/*
   2663 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
   2664 	 * server, but has not been committed to stable storage on the server
   2665 	 * yet. On the first pass, the byte range is worked out and the commit
   2666 	 * rpc is done. On the second pass, ncl_writebp() is called to do the
   2667 	 * job.
   2668 	 */
   2669 again:
   2670 	off = (u_quad_t)-1;
   2671 	endoff = 0;
   2672 	bvecpos = 0;
   2673 	if (NFS_ISV34(vp) && commit) {
   2674 		if (bvec != NULL && bvec != bvec_on_stack)
   2675 			free(bvec, M_TEMP);
   2676 		/*
   2677 		 * Count up how many buffers waiting for a commit.
   2678 		 */
   2679 		bveccount = 0;
   2680 		BO_LOCK(bo);
   2681 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
   2682 			if (!BUF_ISLOCKED(bp) &&
   2683 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
   2684 				== (B_DELWRI | B_NEEDCOMMIT))
   2685 				bveccount++;
   2686 		}
   2687 		/*
   2688 		 * Allocate space to remember the list of bufs to commit.  It is
   2689 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
   2690 		 * If we can't get memory (for whatever reason), we will end up
   2691 		 * committing the buffers one-by-one in the loop below.
   2692 		 */
   2693 		if (bveccount > NFS_COMMITBVECSIZ) {
   2694 			/*
   2695 			 * Release the vnode interlock to avoid a lock
   2696 			 * order reversal.
   2697 			 */
   2698 			BO_UNLOCK(bo);
   2699 			bvec = (struct buf **)
   2700 				malloc(bveccount * sizeof(struct buf *),
   2701 				       M_TEMP, M_NOWAIT);
   2702 			BO_LOCK(bo);
   2703 			if (bvec == NULL) {
   2704 				bvec = bvec_on_stack;
   2705 				bvecsize = NFS_COMMITBVECSIZ;
   2706 			} else
   2707 				bvecsize = bveccount;
   2708 		} else {
   2709 			bvec = bvec_on_stack;
   2710 			bvecsize = NFS_COMMITBVECSIZ;
   2711 		}
   2712 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
   2713 			if (bvecpos >= bvecsize)
   2714 				break;
   2715 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
   2716 				nbp = TAILQ_NEXT(bp, b_bobufs);
   2717 				continue;
   2718 			}
   2719 			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
   2720 			    (B_DELWRI | B_NEEDCOMMIT)) {
   2721 				BUF_UNLOCK(bp);
   2722 				nbp = TAILQ_NEXT(bp, b_bobufs);
   2723 				continue;
   2724 			}
   2725 			BO_UNLOCK(bo);
   2726 			bremfree(bp);
   2727 			/*
   2728 			 * Work out if all buffers are using the same cred
   2729 			 * so we can deal with them all with one commit.
   2730 			 *
   2731 			 * NOTE: we are not clearing B_DONE here, so we have
   2732 			 * to do it later on in this routine if we intend to
   2733 			 * initiate I/O on the bp.
   2734 			 *
   2735 			 * Note: to avoid loopback deadlocks, we do not
   2736 			 * assign b_runningbufspace.
   2737 			 */
   2738 			if (wcred == NULL)
   2739 				wcred = bp->b_wcred;
   2740 			else if (wcred != bp->b_wcred)
   2741 				wcred = NOCRED;
   2742 			vfs_busy_pages(bp, 1);
   2743 
   2744 			BO_LOCK(bo);
   2745 			/*
   2746 			 * bp is protected by being locked, but nbp is not
   2747 			 * and vfs_busy_pages() may sleep.  We have to
   2748 			 * recalculate nbp.
   2749 			 */
   2750 			nbp = TAILQ_NEXT(bp, b_bobufs);
   2751 
   2752 			/*
   2753 			 * A list of these buffers is kept so that the
   2754 			 * second loop knows which buffers have actually
   2755 			 * been committed. This is necessary, since there
   2756 			 * may be a race between the commit rpc and new
   2757 			 * uncommitted writes on the file.
   2758 			 */
   2759 			bvec[bvecpos++] = bp;
   2760 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
   2761 				bp->b_dirtyoff;
   2762 			if (toff < off)
   2763 				off = toff;
   2764 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
   2765 			if (toff > endoff)
   2766 				endoff = toff;
   2767 		}
   2768 		BO_UNLOCK(bo);
   2769 	}
   2770 	if (bvecpos > 0) {
   2771 		/*
   2772 		 * Commit data on the server, as required.
   2773 		 * If all bufs are using the same wcred, then use that with
   2774 		 * one call for all of them, otherwise commit each one
   2775 		 * separately.
   2776 		 */
   2777 		if (wcred != NOCRED)
   2778 			retv = ncl_commit(vp, off, (int)(endoff - off),
   2779 					  wcred, td);
   2780 		else {
   2781 			retv = 0;
   2782 			for (i = 0; i < bvecpos; i++) {
   2783 				off_t off, size;
   2784 				bp = bvec[i];
   2785 				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
   2786 					bp->b_dirtyoff;
   2787 				size = (u_quad_t)(bp->b_dirtyend
   2788 						  - bp->b_dirtyoff);
   2789 				retv = ncl_commit(vp, off, (int)size,
   2790 						  bp->b_wcred, td);
   2791 				if (retv) break;
   2792 			}
   2793 		}
   2794 
   2795 		if (retv == NFSERR_STALEWRITEVERF)
   2796 			ncl_clearcommit(vp->v_mount);
   2797 
   2798 		/*
   2799 		 * Now, either mark the blocks I/O done or mark the
   2800 		 * blocks dirty, depending on whether the commit
   2801 		 * succeeded.
   2802 		 */
   2803 		for (i = 0; i < bvecpos; i++) {
   2804 			bp = bvec[i];
   2805 			bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
   2806 			if (retv) {
   2807 				/*
   2808 				 * Error, leave B_DELWRI intact
   2809 				 */
   2810 				vfs_unbusy_pages(bp);
   2811 				brelse(bp);
   2812 			} else {
   2813 				/*
   2814 				 * Success, remove B_DELWRI ( bundirty() ).
   2815 				 *
   2816 				 * b_dirtyoff/b_dirtyend seem to be NFS
   2817 				 * specific.  We should probably move that
   2818 				 * into bundirty(). XXX
   2819 				 */
   2820 				bufobj_wref(bo);
   2821 				bp->b_flags |= B_ASYNC;
   2822 				bundirty(bp);
   2823 				bp->b_flags &= ~B_DONE;
   2824 				bp->b_ioflags &= ~BIO_ERROR;
   2825 				bp->b_dirtyoff = bp->b_dirtyend = 0;
   2826 				bufdone(bp);
   2827 			}
   2828 		}
   2829 	}
   2830 
   2831 	/*
   2832 	 * Start/do any write(s) that are required.
   2833 	 */
   2834 loop:
   2835 	BO_LOCK(bo);
   2836 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
   2837 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
   2838 			if (waitfor != MNT_WAIT || passone)
   2839 				continue;
   2840 
   2841 			error = BUF_TIMELOCK(bp,
   2842 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
   2843 			    BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo);
   2844 			if (error == 0) {
   2845 				BUF_UNLOCK(bp);
   2846 				goto loop;
   2847 			}
   2848 			if (error == ENOLCK) {
   2849 				error = 0;
   2850 				goto loop;
   2851 			}
   2852 			if (called_from_renewthread != 0) {
   2853 				/*
   2854 				 * Return EIO so the flush will be retried
   2855 				 * later.
   2856 				 */
   2857 				error = EIO;
   2858 				goto done;
   2859 			}
   2860 			if (newnfs_sigintr(nmp, td)) {
   2861 				error = EINTR;
   2862 				goto done;
   2863 			}
   2864 			if (slpflag == PCATCH) {
   2865 				slpflag = 0;
   2866 				slptimeo = 2 * hz;
   2867 			}
   2868 			goto loop;
   2869 		}
   2870 		if ((bp->b_flags & B_DELWRI) == 0)
   2871 			panic("nfs_fsync: not dirty");
   2872 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
   2873 			BUF_UNLOCK(bp);
   2874 			continue;
   2875 		}
   2876 		BO_UNLOCK(bo);
   2877 		bremfree(bp);
   2878 		if (passone || !commit)
   2879 		    bp->b_flags |= B_ASYNC;
   2880 		else
   2881 		    bp->b_flags |= B_ASYNC;
   2882 		bwrite(bp);
   2883 		if (newnfs_sigintr(nmp, td)) {
   2884 			error = EINTR;
   2885 			goto done;
   2886 		}
   2887 		goto loop;
   2888 	}
   2889 	if (passone) {
   2890 		passone = 0;
   2891 		BO_UNLOCK(bo);
   2892 		goto again;
   2893 	}
   2894 	if (waitfor == MNT_WAIT) {
   2895 		while (bo->bo_numoutput) {
   2896 			error = bufobj_wwait(bo, slpflag, slptimeo);
   2897 			if (error) {
   2898 			    BO_UNLOCK(bo);
   2899 			    if (called_from_renewthread != 0) {
   2900 				/*
   2901 				 * Return EIO so that the flush will be
   2902 				 * retried later.
   2903 				 */
   2904 				error = EIO;
   2905 				goto done;
   2906 			    }
   2907 			    error = newnfs_sigintr(nmp, td);
   2908 			    if (error)
   2909 				goto done;
   2910 			    if (slpflag == PCATCH) {
   2911 				slpflag = 0;
   2912 				slptimeo = 2 * hz;
   2913 			    }
   2914 			    BO_LOCK(bo);
   2915 			}
   2916 		}
   2917 		if (bo->bo_dirty.bv_cnt != 0 && commit) {
   2918 			BO_UNLOCK(bo);
   2919 			goto loop;
   2920 		}
   2921 		/*
   2922 		 * Wait for all the async IO requests to drain
   2923 		 */
   2924 		BO_UNLOCK(bo);
   2925 		mtx_lock(&np->n_mtx);
   2926 		while (np->n_directio_asyncwr > 0) {
   2927 			np->n_flag |= NFSYNCWAIT;
   2928 			error = newnfs_msleep(td, &np->n_directio_asyncwr,
   2929 			    &np->n_mtx, slpflag | (PRIBIO + 1),
   2930 			    "nfsfsync", 0);
   2931 			if (error) {
   2932 				if (newnfs_sigintr(nmp, td)) {
   2933 					mtx_unlock(&np->n_mtx);
   2934 					error = EINTR;
   2935 					goto done;
   2936 				}
   2937 			}
   2938 		}
   2939 		mtx_unlock(&np->n_mtx);
   2940 	} else
   2941 		BO_UNLOCK(bo);
   2942 	if (NFSHASPNFS(nmp)) {
   2943 		nfscl_layoutcommit(vp, td);
   2944 		/*
   2945 		 * Invalidate the attribute cache, since writes to a DS
   2946 		 * won't update the size attribute.
   2947 		 */
   2948 		mtx_lock(&np->n_mtx);
   2949 		np->n_attrstamp = 0;
   2950 	} else
   2951 		mtx_lock(&np->n_mtx);
   2952 	if (np->n_flag & NWRITEERR) {
   2953 		error = np->n_error;
   2954 		np->n_flag &= ~NWRITEERR;
   2955 	}
   2956   	if (commit && bo->bo_dirty.bv_cnt == 0 &&
   2957 	    bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
   2958   		np->n_flag &= ~NMODIFIED;
   2959 	mtx_unlock(&np->n_mtx);
   2960 done:
   2961 	if (bvec != NULL && bvec != bvec_on_stack)
   2962 		free(bvec, M_TEMP);
   2963 	if (error == 0 && commit != 0 && waitfor == MNT_WAIT &&
   2964 	    (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 ||
   2965 	     np->n_directio_asyncwr != 0) && trycnt++ < 5) {
   2966 		/* try, try again... */
   2967 		passone = 1;
   2968 		wcred = NULL;
   2969 		bvec = NULL;
   2970 		bvecsize = 0;
   2971 printf("try%d\n", trycnt);
   2972 		goto again;
   2973 	}
   2974 	return (error);
   2975 }
   2976 
   2977 /*
   2978  * NFS advisory byte-level locks.
   2979  */
   2980 static int
   2981 nfs_advlock(struct vop_advlock_args *ap)
   2982 {
   2983 	struct vnode *vp = ap->a_vp;
   2984 	struct ucred *cred;
   2985 	struct nfsnode *np = VTONFS(ap->a_vp);
   2986 	struct proc *p = (struct proc *)ap->a_id;
   2987 	struct thread *td = curthread;	/* XXX */
   2988 	struct vattr va;
   2989 	int ret, error = EOPNOTSUPP;
   2990 	u_quad_t size;
   2991 
   2992 	if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) {
   2993 		if (vp->v_type != VREG)
   2994 			return (EINVAL);
   2995 		if ((ap->a_flags & F_POSIX) != 0)
   2996 			cred = p->p_ucred;
   2997 		else
   2998 			cred = td->td_ucred;
   2999 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
   3000 		if (vp->v_iflag & VI_DOOMED) {
   3001 			NFSVOPUNLOCK(vp, 0);
   3002 			return (EBADF);
   3003 		}
   3004 
   3005 		/*
   3006 		 * If this is unlocking a write locked region, flush and
   3007 		 * commit them before unlocking. This is required by
   3008 		 * RFC3530 Sec. 9.3.2.
   3009 		 */
   3010 		if (ap->a_op == F_UNLCK &&
   3011 		    nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id,
   3012 		    ap->a_flags))
   3013 			(void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0);
   3014 
   3015 		/*
   3016 		 * Loop around doing the lock op, while a blocking lock
   3017 		 * must wait for the lock op to succeed.
   3018 		 */
   3019 		do {
   3020 			ret = nfsrpc_advlock(vp, np->n_size, ap->a_op,
   3021 			    ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags);
   3022 			if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
   3023 			    ap->a_op == F_SETLK) {
   3024 				NFSVOPUNLOCK(vp, 0);
   3025 				error = nfs_catnap(PZERO | PCATCH, ret,
   3026 				    "ncladvl");
   3027 				if (error)
   3028 					return (EINTR);
   3029 				NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
   3030 				if (vp->v_iflag & VI_DOOMED) {
   3031 					NFSVOPUNLOCK(vp, 0);
   3032 					return (EBADF);
   3033 				}
   3034 			}
   3035 		} while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
   3036 		     ap->a_op == F_SETLK);
   3037 		if (ret == NFSERR_DENIED) {
   3038 			NFSVOPUNLOCK(vp, 0);
   3039 			return (EAGAIN);
   3040 		} else if (ret == EINVAL || ret == EBADF || ret == EINTR) {
   3041 			NFSVOPUNLOCK(vp, 0);
   3042 			return (ret);
   3043 		} else if (ret != 0) {
   3044 			NFSVOPUNLOCK(vp, 0);
   3045 			return (EACCES);
   3046 		}
   3047 
   3048 		/*
   3049 		 * Now, if we just got a lock, invalidate data in the buffer
   3050 		 * cache, as required, so that the coherency conforms with
   3051 		 * RFC3530 Sec. 9.3.2.
   3052 		 */
   3053 		if (ap->a_op == F_SETLK) {
   3054 			if ((np->n_flag & NMODIFIED) == 0) {
   3055 				np->n_attrstamp = 0;
   3056 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
   3057 				ret = VOP_GETATTR(vp, &va, cred);
   3058 			}
   3059 			if ((np->n_flag & NMODIFIED) || ret ||
   3060 			    np->n_change != va.va_filerev) {
   3061 				(void) ncl_vinvalbuf(vp, V_SAVE, td, 1);
   3062 				np->n_attrstamp = 0;
   3063 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
   3064 				ret = VOP_GETATTR(vp, &va, cred);
   3065 				if (!ret) {
   3066 					np->n_mtime = va.va_mtime;
   3067 					np->n_change = va.va_filerev;
   3068 				}
   3069 			}
   3070 			/* Mark that a file lock has been acquired. */
   3071 			mtx_lock(&np->n_mtx);
   3072 			np->n_flag |= NHASBEENLOCKED;
   3073 			mtx_unlock(&np->n_mtx);
   3074 		}
   3075 		NFSVOPUNLOCK(vp, 0);
   3076 		return (0);
   3077 	} else if (!NFS_ISV4(vp)) {
   3078 		error = NFSVOPLOCK(vp, LK_SHARED);
   3079 		if (error)
   3080 			return (error);
   3081 		if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
   3082 			size = VTONFS(vp)->n_size;
   3083 			NFSVOPUNLOCK(vp, 0);
   3084 			error = lf_advlock(ap, &(vp->v_lockf), size);
   3085 		} else {
   3086 			if (nfs_advlock_p != NULL)
   3087 				error = nfs_advlock_p(ap);
   3088 			else {
   3089 				NFSVOPUNLOCK(vp, 0);
   3090 				error = ENOLCK;
   3091 			}
   3092 		}
   3093 		if (error == 0 && ap->a_op == F_SETLK) {
   3094 			error = NFSVOPLOCK(vp, LK_SHARED);
   3095 			if (error == 0) {
   3096 				/* Mark that a file lock has been acquired. */
   3097 				mtx_lock(&np->n_mtx);
   3098 				np->n_flag |= NHASBEENLOCKED;
   3099 				mtx_unlock(&np->n_mtx);
   3100 				NFSVOPUNLOCK(vp, 0);
   3101 			}
   3102 		}
   3103 	}
   3104 	return (error);
   3105 }
   3106 
   3107 /*
   3108  * NFS advisory byte-level locks.
   3109  */
   3110 static int
   3111 nfs_advlockasync(struct vop_advlockasync_args *ap)
   3112 {
   3113 	struct vnode *vp = ap->a_vp;
   3114 	u_quad_t size;
   3115 	int error;
   3116 
   3117 	if (NFS_ISV4(vp))
   3118 		return (EOPNOTSUPP);
   3119 	error = NFSVOPLOCK(vp, LK_SHARED);
   3120 	if (error)
   3121 		return (error);
   3122 	if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
   3123 		size = VTONFS(vp)->n_size;
   3124 		NFSVOPUNLOCK(vp, 0);
   3125 		error = lf_advlockasync(ap, &(vp->v_lockf), size);
   3126 	} else {
   3127 		NFSVOPUNLOCK(vp, 0);
   3128 		error = EOPNOTSUPP;
   3129 	}
   3130 	return (error);
   3131 }
   3132 
   3133 /*
   3134  * Print out the contents of an nfsnode.
   3135  */
   3136 static int
   3137 nfs_print(struct vop_print_args *ap)
   3138 {
   3139 	struct vnode *vp = ap->a_vp;
   3140 	struct nfsnode *np = VTONFS(vp);
   3141 
   3142 	printf("\tfileid %ld fsid 0x%x", np->n_vattr.na_fileid,
   3143 	    np->n_vattr.na_fsid);
   3144 	if (vp->v_type == VFIFO)
   3145 		fifo_printinfo(vp);
   3146 	printf("\n");
   3147 	return (0);
   3148 }
   3149 
   3150 /*
   3151  * This is the "real" nfs::bwrite(struct buf*).
   3152  * We set B_CACHE if this is a VMIO buffer.
   3153  */
   3154 int
   3155 ncl_writebp(struct buf *bp, int force __unused, struct thread *td)
   3156 {
   3157 	int s;
   3158 	int oldflags = bp->b_flags;
   3159 #if 0
   3160 	int retv = 1;
   3161 	off_t off;
   3162 #endif
   3163 
   3164 	BUF_ASSERT_HELD(bp);
   3165 
   3166 	if (bp->b_flags & B_INVAL) {
   3167 		brelse(bp);
   3168 		return(0);
   3169 	}
   3170 
   3171 	bp->b_flags |= B_CACHE;
   3172 
   3173 	/*
   3174 	 * Undirty the bp.  We will redirty it later if the I/O fails.
   3175 	 */
   3176 
   3177 	s = splbio();
   3178 	bundirty(bp);
   3179 	bp->b_flags &= ~B_DONE;
   3180 	bp->b_ioflags &= ~BIO_ERROR;
   3181 	bp->b_iocmd = BIO_WRITE;
   3182 
   3183 	bufobj_wref(bp->b_bufobj);
   3184 	curthread->td_ru.ru_oublock++;
   3185 	splx(s);
   3186 
   3187 	/*
   3188 	 * Note: to avoid loopback deadlocks, we do not
   3189 	 * assign b_runningbufspace.
   3190 	 */
   3191 	vfs_busy_pages(bp, 1);
   3192 
   3193 	BUF_KERNPROC(bp);
   3194 	bp->b_iooffset = dbtob(bp->b_blkno);
   3195 	bstrategy(bp);
   3196 
   3197 	if( (oldflags & B_ASYNC) == 0) {
   3198 		int rtval = bufwait(bp);
   3199 
   3200 		if (oldflags & B_DELWRI) {
   3201 			s = splbio();
   3202 			reassignbuf(bp);
   3203 			splx(s);
   3204 		}
   3205 		brelse(bp);
   3206 		return (rtval);
   3207 	}
   3208 
   3209 	return (0);
   3210 }
   3211 
   3212 /*
   3213  * nfs special file access vnode op.
   3214  * Essentially just get vattr and then imitate iaccess() since the device is
   3215  * local to the client.
   3216  */
   3217 static int
   3218 nfsspec_access(struct vop_access_args *ap)
   3219 {
   3220 	struct vattr *vap;
   3221 	struct ucred *cred = ap->a_cred;
   3222 	struct vnode *vp = ap->a_vp;
   3223 	accmode_t accmode = ap->a_accmode;
   3224 	struct vattr vattr;
   3225 	int error;
   3226 
   3227 	/*
   3228 	 * Disallow write attempts on filesystems mounted read-only;
   3229 	 * unless the file is a socket, fifo, or a block or character
   3230 	 * device resident on the filesystem.
   3231 	 */
   3232 	if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
   3233 		switch (vp->v_type) {
   3234 		case VREG:
   3235 		case VDIR:
   3236 		case VLNK:
   3237 			return (EROFS);
   3238 		default:
   3239 			break;
   3240 		}
   3241 	}
   3242 	vap = &vattr;
   3243 	error = VOP_GETATTR(vp, vap, cred);
   3244 	if (error)
   3245 		goto out;
   3246 	error  = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
   3247 	    accmode, cred, NULL);
   3248 out:
   3249 	return error;
   3250 }
   3251 
   3252 /*
   3253  * Read wrapper for fifos.
   3254  */
   3255 static int
   3256 nfsfifo_read(struct vop_read_args *ap)
   3257 {
   3258 	struct nfsnode *np = VTONFS(ap->a_vp);
   3259 	int error;
   3260 
   3261 	/*
   3262 	 * Set access flag.
   3263 	 */
   3264 	mtx_lock(&np->n_mtx);
   3265 	np->n_flag |= NACC;
   3266 	vfs_timestamp(&np->n_atim);
   3267 	mtx_unlock(&np->n_mtx);
   3268 	error = fifo_specops.vop_read(ap);
   3269 	return error;
   3270 }
   3271 
   3272 /*
   3273  * Write wrapper for fifos.
   3274  */
   3275 static int
   3276 nfsfifo_write(struct vop_write_args *ap)
   3277 {
   3278 	struct nfsnode *np = VTONFS(ap->a_vp);
   3279 
   3280 	/*
   3281 	 * Set update flag.
   3282 	 */
   3283 	mtx_lock(&np->n_mtx);
   3284 	np->n_flag |= NUPD;
   3285 	vfs_timestamp(&np->n_mtim);
   3286 	mtx_unlock(&np->n_mtx);
   3287 	return(fifo_specops.vop_write(ap));
   3288 }
   3289 
   3290 /*
   3291  * Close wrapper for fifos.
   3292  *
   3293  * Update the times on the nfsnode then do fifo close.
   3294  */
   3295 static int
   3296 nfsfifo_close(struct vop_close_args *ap)
   3297 {
   3298 	struct vnode *vp = ap->a_vp;
   3299 	struct nfsnode *np = VTONFS(vp);
   3300 	struct vattr vattr;
   3301 	struct timespec ts;
   3302 
   3303 	mtx_lock(&np->n_mtx);
   3304 	if (np->n_flag & (NACC | NUPD)) {
   3305 		vfs_timestamp(&ts);
   3306 		if (np->n_flag & NACC)
   3307 			np->n_atim = ts;
   3308 		if (np->n_flag & NUPD)
   3309 			np->n_mtim = ts;
   3310 		np->n_flag |= NCHG;
   3311 		if (vrefcnt(vp) == 1 &&
   3312 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
   3313 			VATTR_NULL(&vattr);
   3314 			if (np->n_flag & NACC)
   3315 				vattr.va_atime = np->n_atim;
   3316 			if (np->n_flag & NUPD)
   3317 				vattr.va_mtime = np->n_mtim;
   3318 			mtx_unlock(&np->n_mtx);
   3319 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred);
   3320 			goto out;
   3321 		}
   3322 	}
   3323 	mtx_unlock(&np->n_mtx);
   3324 out:
   3325 	return (fifo_specops.vop_close(ap));
   3326 }
   3327 
   3328 /*
   3329  * Just call ncl_writebp() with the force argument set to 1.
   3330  *
   3331  * NOTE: B_DONE may or may not be set in a_bp on call.
   3332  */
   3333 static int
   3334 nfs_bwrite(struct buf *bp)
   3335 {
   3336 
   3337 	return (ncl_writebp(bp, 1, curthread));
   3338 }
   3339 
   3340 struct buf_ops buf_ops_newnfs = {
   3341 	.bop_name	=	"buf_ops_nfs",
   3342 	.bop_write	=	nfs_bwrite,
   3343 	.bop_strategy	=	bufstrategy,
   3344 	.bop_sync	=	bufsync,
   3345 	.bop_bdflush	=	bufbdflush,
   3346 };
   3347 
   3348 static int
   3349 nfs_getacl(struct vop_getacl_args *ap)
   3350 {
   3351 	int error;
   3352 
   3353 	if (ap->a_type != ACL_TYPE_NFS4)
   3354 		return (EOPNOTSUPP);
   3355 	error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
   3356 	    NULL);
   3357 	if (error > NFSERR_STALE) {
   3358 		(void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
   3359 		error = EPERM;
   3360 	}
   3361 	return (error);
   3362 }
   3363 
   3364 static int
   3365 nfs_setacl(struct vop_setacl_args *ap)
   3366 {
   3367 	int error;
   3368 
   3369 	if (ap->a_type != ACL_TYPE_NFS4)
   3370 		return (EOPNOTSUPP);
   3371 	error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
   3372 	    NULL);
   3373 	if (error > NFSERR_STALE) {
   3374 		(void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
   3375 		error = EPERM;
   3376 	}
   3377 	return (error);
   3378 }
   3379 
   3380 /*
   3381  * Return POSIX pathconf information applicable to nfs filesystems.
   3382  */
   3383 static int
   3384 nfs_pathconf(struct vop_pathconf_args *ap)
   3385 {
   3386 	struct nfsv3_pathconf pc;
   3387 	struct nfsvattr nfsva;
   3388 	struct vnode *vp = ap->a_vp;
   3389 	struct thread *td = curthread;
   3390 	int attrflag, error;
   3391 
   3392 	if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX ||
   3393 	    ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED ||
   3394 	    ap->a_name == _PC_NO_TRUNC)) ||
   3395 	    (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) {
   3396 		/*
   3397 		 * Since only the above 4 a_names are returned by the NFSv3
   3398 		 * Pathconf RPC, there is no point in doing it for others.
   3399 		 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can
   3400 		 * be used for _PC_NFS4_ACL as well.
   3401 		 */
   3402 		error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva,
   3403 		    &attrflag, NULL);
   3404 		if (attrflag != 0)
   3405 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
   3406 			    1);
   3407 		if (error != 0)
   3408 			return (error);
   3409 	} else {
   3410 		/*
   3411 		 * For NFSv2 (or NFSv3 when not one of the above 4 a_names),
   3412 		 * just fake them.
   3413 		 */
   3414 		pc.pc_linkmax = LINK_MAX;
   3415 		pc.pc_namemax = NFS_MAXNAMLEN;
   3416 		pc.pc_notrunc = 1;
   3417 		pc.pc_chownrestricted = 1;
   3418 		pc.pc_caseinsensitive = 0;
   3419 		pc.pc_casepreserving = 1;
   3420 		error = 0;
   3421 	}
   3422 	switch (ap->a_name) {
   3423 	case _PC_LINK_MAX:
   3424 		*ap->a_retval = pc.pc_linkmax;
   3425 		break;
   3426 	case _PC_NAME_MAX:
   3427 		*ap->a_retval = pc.pc_namemax;
   3428 		break;
   3429 	case _PC_PATH_MAX:
   3430 		*ap->a_retval = PATH_MAX;
   3431 		break;
   3432 	case _PC_PIPE_BUF:
   3433 		*ap->a_retval = PIPE_BUF;
   3434 		break;
   3435 	case _PC_CHOWN_RESTRICTED:
   3436 		*ap->a_retval = pc.pc_chownrestricted;
   3437 		break;
   3438 	case _PC_NO_TRUNC:
   3439 		*ap->a_retval = pc.pc_notrunc;
   3440 		break;
   3441 	case _PC_ACL_EXTENDED:
   3442 		*ap->a_retval = 0;
   3443 		break;
   3444 	case _PC_ACL_NFS4:
   3445 		if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 &&
   3446 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL))
   3447 			*ap->a_retval = 1;
   3448 		else
   3449 			*ap->a_retval = 0;
   3450 		break;
   3451 	case _PC_ACL_PATH_MAX:
   3452 		if (NFS_ISV4(vp))
   3453 			*ap->a_retval = ACL_MAX_ENTRIES;
   3454 		else
   3455 			*ap->a_retval = 3;
   3456 		break;
   3457 	case _PC_MAC_PRESENT:
   3458 		*ap->a_retval = 0;
   3459 		break;
   3460 	case _PC_ASYNC_IO:
   3461 		/* _PC_ASYNC_IO should have been handled by upper layers. */
   3462 		KASSERT(0, ("_PC_ASYNC_IO should not get here"));
   3463 		error = EINVAL;
   3464 		break;
   3465 	case _PC_PRIO_IO:
   3466 		*ap->a_retval = 0;
   3467 		break;
   3468 	case _PC_SYNC_IO:
   3469 		*ap->a_retval = 0;
   3470 		break;
   3471 	case _PC_ALLOC_SIZE_MIN:
   3472 		*ap->a_retval = vp->v_mount->mnt_stat.f_bsize;
   3473 		break;
   3474 	case _PC_FILESIZEBITS:
   3475 		if (NFS_ISV34(vp))
   3476 			*ap->a_retval = 64;
   3477 		else
   3478 			*ap->a_retval = 32;
   3479 		break;
   3480 	case _PC_REC_INCR_XFER_SIZE:
   3481 		*ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
   3482 		break;
   3483 	case _PC_REC_MAX_XFER_SIZE:
   3484 		*ap->a_retval = -1; /* means ``unlimited'' */
   3485 		break;
   3486 	case _PC_REC_MIN_XFER_SIZE:
   3487 		*ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
   3488 		break;
   3489 	case _PC_REC_XFER_ALIGN:
   3490 		*ap->a_retval = PAGE_SIZE;
   3491 		break;
   3492 	case _PC_SYMLINK_MAX:
   3493 		*ap->a_retval = NFS_MAXPATHLEN;
   3494 		break;
   3495 
   3496 	default:
   3497 		error = EINVAL;
   3498 		break;
   3499 	}
   3500 	return (error);
   3501 }
   3502 
   3503