Home | History | Annotate | Line # | Download | only in nfs
nfs_syscalls.c revision 1.148
      1 /*	$NetBSD: nfs_syscalls.c,v 1.148 2009/05/23 14:44:56 ad Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1989, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley by
      8  * Rick Macklem at The University of Guelph.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. Neither the name of the University nor the names of its contributors
     19  *    may be used to endorse or promote products derived from this software
     20  *    without specific prior written permission.
     21  *
     22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32  * SUCH DAMAGE.
     33  *
     34  *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
     35  */
     36 
     37 #include <sys/cdefs.h>
     38 __KERNEL_RCSID(0, "$NetBSD: nfs_syscalls.c,v 1.148 2009/05/23 14:44:56 ad Exp $");
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/kernel.h>
     43 #include <sys/file.h>
     44 #include <sys/stat.h>
     45 #include <sys/vnode.h>
     46 #include <sys/mount.h>
     47 #include <sys/proc.h>
     48 #include <sys/uio.h>
     49 #include <sys/malloc.h>
     50 #include <sys/kmem.h>
     51 #include <sys/buf.h>
     52 #include <sys/mbuf.h>
     53 #include <sys/socket.h>
     54 #include <sys/socketvar.h>
     55 #include <sys/signalvar.h>
     56 #include <sys/domain.h>
     57 #include <sys/protosw.h>
     58 #include <sys/namei.h>
     59 #include <sys/syslog.h>
     60 #include <sys/filedesc.h>
     61 #include <sys/kthread.h>
     62 #include <sys/kauth.h>
     63 #include <sys/syscallargs.h>
     64 
     65 #include <netinet/in.h>
     66 #include <netinet/tcp.h>
     67 #include <nfs/xdr_subs.h>
     68 #include <nfs/rpcv2.h>
     69 #include <nfs/nfsproto.h>
     70 #include <nfs/nfs.h>
     71 #include <nfs/nfsm_subs.h>
     72 #include <nfs/nfsrvcache.h>
     73 #include <nfs/nfsmount.h>
     74 #include <nfs/nfsnode.h>
     75 #include <nfs/nfsrtt.h>
     76 #include <nfs/nfs_var.h>
     77 
     78 extern int32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *,
     79 						struct nfssvc_sock *,
     80 						struct lwp *, struct mbuf **);
     81 extern int nfsrvw_procrastinate;
     82 extern int nuidhash_max;
     83 
     84 static int nfs_numnfsd = 0;
     85 static struct nfsdrt nfsdrt;
     86 kmutex_t nfsd_lock;
     87 struct nfssvc_sockhead nfssvc_sockhead;
     88 kcondvar_t nfsd_initcv;
     89 struct nfssvc_sockhead nfssvc_sockpending;
     90 struct nfsdhead nfsd_head;
     91 struct nfsdidlehead nfsd_idle_head;
     92 
     93 int nfssvc_sockhead_flag;
     94 int nfsd_head_flag;
     95 
     96 struct nfssvc_sock *nfs_udpsock;
     97 struct nfssvc_sock *nfs_udp6sock;
     98 
     99 static struct nfssvc_sock *nfsrv_sockalloc(void);
    100 static void nfsrv_sockfree(struct nfssvc_sock *);
    101 static void nfsd_rt(int, struct nfsrv_descript *, int);
    102 
    103 /*
    104  * NFS server system calls
    105  */
    106 
    107 
    108 /*
    109  * Nfs server pseudo system call for the nfsd's
    110  * Based on the flag value it either:
    111  * - adds a socket to the selection list
    112  * - remains in the kernel as an nfsd
    113  * - remains in the kernel as an nfsiod
    114  */
    115 int
    116 sys_nfssvc(struct lwp *l, const struct sys_nfssvc_args *uap, register_t *retval)
    117 {
    118 	/* {
    119 		syscallarg(int) flag;
    120 		syscallarg(void *) argp;
    121 	} */
    122 	int error;
    123 	file_t *fp;
    124 	struct mbuf *nam;
    125 	struct nfsd_args nfsdarg;
    126 	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
    127 	struct nfsd *nfsd;
    128 	struct nfssvc_sock *slp;
    129 	struct nfsuid *nuidp;
    130 
    131 	/*
    132 	 * Must be super user
    133 	 */
    134 	error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_NFS,
    135 	    KAUTH_REQ_NETWORK_NFS_SVC, NULL, NULL, NULL);
    136 	if (error)
    137 		return (error);
    138 
    139 	mutex_enter(&nfsd_lock);
    140 	while (nfssvc_sockhead_flag & SLP_INIT) {
    141 		cv_wait(&nfsd_initcv, &nfsd_lock);
    142 	}
    143 	mutex_exit(&nfsd_lock);
    144 
    145 	if (SCARG(uap, flag) & NFSSVC_BIOD) {
    146 		/* Dummy implementation of nfsios for 1.4 and earlier. */
    147 		error = kpause("nfsbiod", true, 0, NULL);
    148 	} else if (SCARG(uap, flag) & NFSSVC_MNTD) {
    149 		error = ENOSYS;
    150 	} else if (SCARG(uap, flag) & NFSSVC_ADDSOCK) {
    151 		error = copyin(SCARG(uap, argp), (void *)&nfsdarg,
    152 		    sizeof(nfsdarg));
    153 		if (error)
    154 			return (error);
    155 		/* getsock() will use the descriptor for us */
    156 		if ((fp = fd_getfile(nfsdarg.sock)) == NULL)
    157 			return (EBADF);
    158 		if (fp->f_type != DTYPE_SOCKET) {
    159 			fd_putfile(nfsdarg.sock);
    160 			return (ENOTSOCK);
    161 		}
    162 		/*
    163 		 * Get the client address for connected sockets.
    164 		 */
    165 		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
    166 			nam = (struct mbuf *)0;
    167 		else {
    168 			error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
    169 				MT_SONAME);
    170 			if (error) {
    171 				fd_putfile(nfsdarg.sock);
    172 				return (error);
    173 			}
    174 		}
    175 		error = nfssvc_addsock(fp, nam);
    176 		fd_putfile(nfsdarg.sock);
    177 	} else if (SCARG(uap, flag) & NFSSVC_SETEXPORTSLIST) {
    178 		struct export_args *args;
    179 		struct mountd_exports_list mel;
    180 
    181 		error = copyin(SCARG(uap, argp), &mel, sizeof(mel));
    182 		if (error != 0)
    183 			return error;
    184 
    185 		args = (struct export_args *)malloc(mel.mel_nexports *
    186 		    sizeof(struct export_args), M_TEMP, M_WAITOK);
    187 		error = copyin(mel.mel_exports, args, mel.mel_nexports *
    188 		    sizeof(struct export_args));
    189 		if (error != 0) {
    190 			free(args, M_TEMP);
    191 			return error;
    192 		}
    193 		mel.mel_exports = args;
    194 
    195 		error = mountd_set_exports_list(&mel, l);
    196 
    197 		free(args, M_TEMP);
    198 	} else {
    199 		error = copyin(SCARG(uap, argp), (void *)nsd, sizeof (*nsd));
    200 		if (error)
    201 			return (error);
    202 		if ((SCARG(uap, flag) & NFSSVC_AUTHIN) &&
    203 		    ((nfsd = nsd->nsd_nfsd)) != NULL &&
    204 		    (nfsd->nfsd_slp->ns_flags & SLP_VALID)) {
    205 			slp = nfsd->nfsd_slp;
    206 
    207 			/*
    208 			 * First check to see if another nfsd has already
    209 			 * added this credential.
    210 			 */
    211 			LIST_FOREACH(nuidp, NUIDHASH(slp, nsd->nsd_cr.cr_uid),
    212 			    nu_hash) {
    213 				if (kauth_cred_geteuid(nuidp->nu_cr) ==
    214 				    nsd->nsd_cr.cr_uid &&
    215 				    (!nfsd->nfsd_nd->nd_nam2 ||
    216 				     netaddr_match(NU_NETFAM(nuidp),
    217 				     &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
    218 					break;
    219 			}
    220 			if (nuidp) {
    221 			    kauth_cred_hold(nuidp->nu_cr);
    222 			    nfsd->nfsd_nd->nd_cr = nuidp->nu_cr;
    223 			    nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
    224 			} else {
    225 			    /*
    226 			     * Nope, so we will.
    227 			     */
    228 			    if (slp->ns_numuids < nuidhash_max) {
    229 				slp->ns_numuids++;
    230 				nuidp = kmem_alloc(sizeof(*nuidp), KM_SLEEP);
    231 			    } else
    232 				nuidp = (struct nfsuid *)0;
    233 			    if ((slp->ns_flags & SLP_VALID) == 0) {
    234 				if (nuidp)
    235 				    kmem_free(nuidp, sizeof(*nuidp));
    236 			    } else {
    237 				if (nuidp == (struct nfsuid *)0) {
    238 				    nuidp = TAILQ_FIRST(&slp->ns_uidlruhead);
    239 				    LIST_REMOVE(nuidp, nu_hash);
    240 				    TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
    241 					nu_lru);
    242 				    if (nuidp->nu_flag & NU_NAM)
    243 					m_freem(nuidp->nu_nam);
    244 			        }
    245 				nuidp->nu_flag = 0;
    246 				kauth_uucred_to_cred(nuidp->nu_cr,
    247 				    &nsd->nsd_cr);
    248 				nuidp->nu_timestamp = nsd->nsd_timestamp;
    249 				nuidp->nu_expire = time_second + nsd->nsd_ttl;
    250 				/*
    251 				 * and save the session key in nu_key.
    252 				 */
    253 				memcpy(nuidp->nu_key, nsd->nsd_key,
    254 				    sizeof(nsd->nsd_key));
    255 				if (nfsd->nfsd_nd->nd_nam2) {
    256 				    struct sockaddr_in *saddr;
    257 
    258 				    saddr = mtod(nfsd->nfsd_nd->nd_nam2,
    259 					 struct sockaddr_in *);
    260 				    switch (saddr->sin_family) {
    261 				    case AF_INET:
    262 					nuidp->nu_flag |= NU_INETADDR;
    263 					nuidp->nu_inetaddr =
    264 					     saddr->sin_addr.s_addr;
    265 					break;
    266 				    case AF_INET6:
    267 					nuidp->nu_flag |= NU_NAM;
    268 					nuidp->nu_nam = m_copym(
    269 					    nfsd->nfsd_nd->nd_nam2, 0,
    270 					     M_COPYALL, M_WAIT);
    271 					break;
    272 				    default:
    273 					return EAFNOSUPPORT;
    274 				    };
    275 				}
    276 				TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
    277 					nu_lru);
    278 				LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
    279 					nuidp, nu_hash);
    280 				kauth_cred_hold(nuidp->nu_cr);
    281 				nfsd->nfsd_nd->nd_cr = nuidp->nu_cr;
    282 				nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
    283 			    }
    284 			}
    285 		}
    286 		if ((SCARG(uap, flag) & NFSSVC_AUTHINFAIL) &&
    287 		    (nfsd = nsd->nsd_nfsd))
    288 			nfsd->nfsd_flag |= NFSD_AUTHFAIL;
    289 		error = nfssvc_nfsd(nsd, SCARG(uap, argp), l);
    290 	}
    291 	if (error == EINTR || error == ERESTART)
    292 		error = 0;
    293 	return (error);
    294 }
    295 
    296 static struct nfssvc_sock *
    297 nfsrv_sockalloc(void)
    298 {
    299 	struct nfssvc_sock *slp;
    300 
    301 	slp = kmem_alloc(sizeof(*slp), KM_SLEEP);
    302 	memset(slp, 0, sizeof (struct nfssvc_sock));
    303 	mutex_init(&slp->ns_lock, MUTEX_DRIVER, IPL_SOFTNET);
    304 	mutex_init(&slp->ns_alock, MUTEX_DRIVER, IPL_SOFTNET);
    305 	cv_init(&slp->ns_cv, "nfsdsock");
    306 	TAILQ_INIT(&slp->ns_uidlruhead);
    307 	LIST_INIT(&slp->ns_tq);
    308 	SIMPLEQ_INIT(&slp->ns_sendq);
    309 	mutex_enter(&nfsd_lock);
    310 	TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
    311 	mutex_exit(&nfsd_lock);
    312 
    313 	return slp;
    314 }
    315 
    316 static void
    317 nfsrv_sockfree(struct nfssvc_sock *slp)
    318 {
    319 
    320 	KASSERT(slp->ns_so == NULL);
    321 	KASSERT(slp->ns_fp == NULL);
    322 	KASSERT((slp->ns_flags & SLP_VALID) == 0);
    323 	mutex_destroy(&slp->ns_lock);
    324 	mutex_destroy(&slp->ns_alock);
    325 	cv_destroy(&slp->ns_cv);
    326 	kmem_free(slp, sizeof(*slp));
    327 }
    328 
    329 /*
    330  * Adds a socket to the list for servicing by nfsds.
    331  */
    332 int
    333 nfssvc_addsock(file_t *fp, struct mbuf *mynam)
    334 {
    335 	int siz;
    336 	struct nfssvc_sock *slp;
    337 	struct socket *so;
    338 	struct nfssvc_sock *tslp;
    339 	int error;
    340 	int val;
    341 
    342 	so = (struct socket *)fp->f_data;
    343 	tslp = (struct nfssvc_sock *)0;
    344 	/*
    345 	 * Add it to the list, as required.
    346 	 */
    347 	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
    348 		if (so->so_proto->pr_domain->dom_family == AF_INET6)
    349 			tslp = nfs_udp6sock;
    350 		else {
    351 			tslp = nfs_udpsock;
    352 			if (tslp->ns_flags & SLP_VALID) {
    353 				m_freem(mynam);
    354 				return (EPERM);
    355 			}
    356 		}
    357 	}
    358 	if (so->so_type == SOCK_STREAM)
    359 		siz = NFS_MAXPACKET + sizeof (u_long);
    360 	else
    361 		siz = NFS_MAXPACKET;
    362 	solock(so);
    363 	error = soreserve(so, siz, siz);
    364 	sounlock(so);
    365 	if (error) {
    366 		m_freem(mynam);
    367 		return (error);
    368 	}
    369 
    370 	/*
    371 	 * Set protocol specific options { for now TCP only } and
    372 	 * reserve some space. For datagram sockets, this can get called
    373 	 * repeatedly for the same socket, but that isn't harmful.
    374 	 */
    375 	if (so->so_type == SOCK_STREAM) {
    376 		val = 1;
    377 		so_setsockopt(NULL, so, SOL_SOCKET, SO_KEEPALIVE, &val,
    378 		    sizeof(val));
    379 	}
    380 	if ((so->so_proto->pr_domain->dom_family == AF_INET ||
    381 	    so->so_proto->pr_domain->dom_family == AF_INET6) &&
    382 	    so->so_proto->pr_protocol == IPPROTO_TCP) {
    383 		val = 1;
    384 		so_setsockopt(NULL, so, IPPROTO_TCP, TCP_NODELAY, &val,
    385 		    sizeof(val));
    386 	}
    387 	solock(so);
    388 	so->so_rcv.sb_flags &= ~SB_NOINTR;
    389 	so->so_rcv.sb_timeo = 0;
    390 	so->so_snd.sb_flags &= ~SB_NOINTR;
    391 	so->so_snd.sb_timeo = 0;
    392 	sounlock(so);
    393 	if (tslp) {
    394 		slp = tslp;
    395 	} else {
    396 		slp = nfsrv_sockalloc();
    397 	}
    398 	slp->ns_so = so;
    399 	slp->ns_nam = mynam;
    400 	mutex_enter(&fp->f_lock);
    401 	fp->f_count++;
    402 	mutex_exit(&fp->f_lock);
    403 	slp->ns_fp = fp;
    404 	slp->ns_flags = SLP_VALID;
    405 	slp->ns_aflags = SLP_A_NEEDQ;
    406 	slp->ns_gflags = 0;
    407 	slp->ns_sflags = 0;
    408 	solock(so);
    409 	so->so_upcallarg = (void *)slp;
    410 	so->so_upcall = nfsrv_soupcall;
    411 	so->so_rcv.sb_flags |= SB_UPCALL;
    412 	sounlock(so);
    413 	nfsrv_wakenfsd(slp);
    414 	return (0);
    415 }
    416 
    417 /*
    418  * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
    419  * until it is killed by a signal.
    420  */
    421 int
    422 nfssvc_nfsd(struct nfsd_srvargs *nsd, void *argp, struct lwp *l)
    423 {
    424 	struct timeval tv;
    425 	struct mbuf *m;
    426 	struct nfssvc_sock *slp;
    427 	struct nfsd *nfsd = nsd->nsd_nfsd;
    428 	struct nfsrv_descript *nd = NULL;
    429 	struct mbuf *mreq;
    430 	u_quad_t cur_usec;
    431 	int error = 0, cacherep, siz, sotype, writes_todo;
    432 	struct proc *p = l->l_proc;
    433 	bool doreinit;
    434 
    435 #ifndef nolint
    436 	cacherep = RC_DOIT;
    437 	writes_todo = 0;
    438 #endif
    439 	uvm_lwp_hold(l);
    440 	if (nfsd == NULL) {
    441 		nsd->nsd_nfsd = nfsd = kmem_alloc(sizeof(*nfsd), KM_SLEEP);
    442 		memset(nfsd, 0, sizeof (struct nfsd));
    443 		cv_init(&nfsd->nfsd_cv, "nfsd");
    444 		nfsd->nfsd_procp = p;
    445 		mutex_enter(&nfsd_lock);
    446 		while ((nfssvc_sockhead_flag & SLP_INIT) != 0) {
    447 			KASSERT(nfs_numnfsd == 0);
    448 			cv_wait(&nfsd_initcv, &nfsd_lock);
    449 		}
    450 		TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
    451 		nfs_numnfsd++;
    452 		mutex_exit(&nfsd_lock);
    453 	}
    454 	/*
    455 	 * Loop getting rpc requests until SIGKILL.
    456 	 */
    457 	for (;;) {
    458 		bool dummy;
    459 
    460 		if ((curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
    461 		    != 0) {
    462 			preempt();
    463 		}
    464 		if (nfsd->nfsd_slp == NULL) {
    465 			mutex_enter(&nfsd_lock);
    466 			while (nfsd->nfsd_slp == NULL &&
    467 			    (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
    468 				SLIST_INSERT_HEAD(&nfsd_idle_head, nfsd,
    469 				    nfsd_idle);
    470 				error = cv_wait_sig(&nfsd->nfsd_cv, &nfsd_lock);
    471 				if (error) {
    472 					slp = nfsd->nfsd_slp;
    473 					nfsd->nfsd_slp = NULL;
    474 					if (!slp)
    475 						SLIST_REMOVE(&nfsd_idle_head,
    476 						    nfsd, nfsd, nfsd_idle);
    477 					mutex_exit(&nfsd_lock);
    478 					if (slp) {
    479 						nfsrv_wakenfsd(slp);
    480 						nfsrv_slpderef(slp);
    481 					}
    482 					goto done;
    483 				}
    484 			}
    485 			if (nfsd->nfsd_slp == NULL &&
    486 			    (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
    487 				slp = TAILQ_FIRST(&nfssvc_sockpending);
    488 				if (slp) {
    489 					KASSERT((slp->ns_gflags & SLP_G_DOREC)
    490 					    != 0);
    491 					TAILQ_REMOVE(&nfssvc_sockpending, slp,
    492 					    ns_pending);
    493 					slp->ns_gflags &= ~SLP_G_DOREC;
    494 					slp->ns_sref++;
    495 					nfsd->nfsd_slp = slp;
    496 				} else
    497 					nfsd_head_flag &= ~NFSD_CHECKSLP;
    498 			}
    499 			KASSERT(nfsd->nfsd_slp == NULL ||
    500 			    nfsd->nfsd_slp->ns_sref > 0);
    501 			mutex_exit(&nfsd_lock);
    502 			if ((slp = nfsd->nfsd_slp) == NULL)
    503 				continue;
    504 			if (slp->ns_flags & SLP_VALID) {
    505 				bool more;
    506 
    507 				if (nfsdsock_testbits(slp, SLP_A_NEEDQ)) {
    508 					nfsrv_rcv(slp);
    509 				}
    510 				if (nfsdsock_testbits(slp, SLP_A_DISCONN)) {
    511 					nfsrv_zapsock(slp);
    512 				}
    513 				error = nfsrv_dorec(slp, nfsd, &nd, &more);
    514 				getmicrotime(&tv);
    515 				cur_usec = (u_quad_t)tv.tv_sec * 1000000 +
    516 					(u_quad_t)tv.tv_usec;
    517 				writes_todo = 0;
    518 				if (error) {
    519 					struct nfsrv_descript *nd2;
    520 
    521 					mutex_enter(&nfsd_lock);
    522 					nd2 = LIST_FIRST(&slp->ns_tq);
    523 					if (nd2 != NULL &&
    524 					    nd2->nd_time <= cur_usec) {
    525 						error = 0;
    526 						cacherep = RC_DOIT;
    527 						writes_todo = 1;
    528 					}
    529 					mutex_exit(&nfsd_lock);
    530 				}
    531 				if (error == 0 && more) {
    532 					nfsrv_wakenfsd(slp);
    533 				}
    534 			}
    535 		} else {
    536 			error = 0;
    537 			slp = nfsd->nfsd_slp;
    538 		}
    539 		KASSERT(slp != NULL);
    540 		KASSERT(nfsd->nfsd_slp == slp);
    541 		if (error || (slp->ns_flags & SLP_VALID) == 0) {
    542 			if (nd) {
    543 				nfsdreq_free(nd);
    544 				nd = NULL;
    545 			}
    546 			nfsd->nfsd_slp = NULL;
    547 			nfsrv_slpderef(slp);
    548 			continue;
    549 		}
    550 		sotype = slp->ns_so->so_type;
    551 		if (nd) {
    552 			getmicrotime(&nd->nd_starttime);
    553 			if (nd->nd_nam2)
    554 				nd->nd_nam = nd->nd_nam2;
    555 			else
    556 				nd->nd_nam = slp->ns_nam;
    557 
    558 			/*
    559 			 * Check to see if authorization is needed.
    560 			 */
    561 			if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
    562 				nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
    563 				nsd->nsd_haddr = mtod(nd->nd_nam,
    564 				    struct sockaddr_in *)->sin_addr.s_addr;
    565 				nsd->nsd_authlen = nfsd->nfsd_authlen;
    566 				nsd->nsd_verflen = nfsd->nfsd_verflen;
    567 				if (!copyout(nfsd->nfsd_authstr,
    568 				    nsd->nsd_authstr, nfsd->nfsd_authlen) &&
    569 				    !copyout(nfsd->nfsd_verfstr,
    570 				    nsd->nsd_verfstr, nfsd->nfsd_verflen) &&
    571 				    !copyout(nsd, argp, sizeof (*nsd))) {
    572 					uvm_lwp_rele(l);
    573 					return (ENEEDAUTH);
    574 				}
    575 				cacherep = RC_DROPIT;
    576 			} else
    577 				cacherep = nfsrv_getcache(nd, slp, &mreq);
    578 
    579 			if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
    580 				nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
    581 				nd->nd_procnum = NFSPROC_NOOP;
    582 				nd->nd_repstat =
    583 				    (NFSERR_AUTHERR | AUTH_TOOWEAK);
    584 				cacherep = RC_DOIT;
    585 			}
    586 		}
    587 
    588 		/*
    589 		 * Loop to get all the write rpc relies that have been
    590 		 * gathered together.
    591 		 */
    592 		do {
    593 			switch (cacherep) {
    594 			case RC_DOIT:
    595 				mreq = NULL;
    596 				netexport_rdlock();
    597 				if (writes_todo || nd == NULL ||
    598 				     (!(nd->nd_flag & ND_NFSV3) &&
    599 				     nd->nd_procnum == NFSPROC_WRITE &&
    600 				     nfsrvw_procrastinate > 0))
    601 					error = nfsrv_writegather(&nd, slp,
    602 					    l, &mreq);
    603 				else
    604 					error =
    605 					    (*(nfsrv3_procs[nd->nd_procnum]))
    606 					    (nd, slp, l, &mreq);
    607 				netexport_rdunlock();
    608 				if (mreq == NULL) {
    609 					if (nd != NULL) {
    610 						if (nd->nd_nam2)
    611 							m_free(nd->nd_nam2);
    612 					}
    613 					break;
    614 				}
    615 				if (error) {
    616 					nfsstats.srv_errs++;
    617 					nfsrv_updatecache(nd, false, mreq);
    618 					if (nd->nd_nam2)
    619 						m_freem(nd->nd_nam2);
    620 					break;
    621 				}
    622 				nfsstats.srvrpccnt[nd->nd_procnum]++;
    623 				nfsrv_updatecache(nd, true, mreq);
    624 				nd->nd_mrep = (struct mbuf *)0;
    625 			case RC_REPLY:
    626 				m = mreq;
    627 				siz = 0;
    628 				while (m) {
    629 					siz += m->m_len;
    630 					m = m->m_next;
    631 				}
    632 				if (siz <= 0 || siz > NFS_MAXPACKET) {
    633 					printf("mbuf siz=%d\n",siz);
    634 					panic("Bad nfs svc reply");
    635 				}
    636 				m = mreq;
    637 				m->m_pkthdr.len = siz;
    638 				m->m_pkthdr.rcvif = (struct ifnet *)0;
    639 				/*
    640 				 * For stream protocols, prepend a Sun RPC
    641 				 * Record Mark.
    642 				 */
    643 				if (sotype == SOCK_STREAM) {
    644 					M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
    645 					*mtod(m, u_int32_t *) =
    646 					    htonl(0x80000000 | siz);
    647 				}
    648 				nd->nd_mreq = m;
    649 				if (nfsrtton) {
    650 					nfsd_rt(slp->ns_so->so_type, nd,
    651 					    cacherep);
    652 				}
    653 				error = nfsdsock_sendreply(slp, nd);
    654 				nd = NULL;
    655 				if (error == EPIPE)
    656 					nfsrv_zapsock(slp);
    657 				if (error == EINTR || error == ERESTART) {
    658 					nfsd->nfsd_slp = NULL;
    659 					nfsrv_slpderef(slp);
    660 					goto done;
    661 				}
    662 				break;
    663 			case RC_DROPIT:
    664 				if (nfsrtton)
    665 					nfsd_rt(sotype, nd, cacherep);
    666 				m_freem(nd->nd_mrep);
    667 				m_freem(nd->nd_nam2);
    668 				break;
    669 			}
    670 			if (nd) {
    671 				nfsdreq_free(nd);
    672 				nd = NULL;
    673 			}
    674 
    675 			/*
    676 			 * Check to see if there are outstanding writes that
    677 			 * need to be serviced.
    678 			 */
    679 			getmicrotime(&tv);
    680 			cur_usec = (u_quad_t)tv.tv_sec * 1000000 +
    681 			    (u_quad_t)tv.tv_usec;
    682 			mutex_enter(&nfsd_lock);
    683 			if (LIST_FIRST(&slp->ns_tq) &&
    684 			    LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec) {
    685 				cacherep = RC_DOIT;
    686 				writes_todo = 1;
    687 			} else
    688 				writes_todo = 0;
    689 			mutex_exit(&nfsd_lock);
    690 		} while (writes_todo);
    691 		if (nfsrv_dorec(slp, nfsd, &nd, &dummy)) {
    692 			nfsd->nfsd_slp = NULL;
    693 			nfsrv_slpderef(slp);
    694 		}
    695 	}
    696 done:
    697 	mutex_enter(&nfsd_lock);
    698 	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
    699 	doreinit = --nfs_numnfsd == 0;
    700 	if (doreinit)
    701 		nfssvc_sockhead_flag |= SLP_INIT;
    702 	mutex_exit(&nfsd_lock);
    703 	cv_destroy(&nfsd->nfsd_cv);
    704 	kmem_free(nfsd, sizeof(*nfsd));
    705 	nsd->nsd_nfsd = NULL;
    706 	if (doreinit)
    707 		nfsrv_init(true);	/* Reinitialize everything */
    708 	uvm_lwp_rele(l);
    709 	return (error);
    710 }
    711 
    712 /*
    713  * Shut down a socket associated with an nfssvc_sock structure.
    714  * Should be called with the send lock set, if required.
    715  * The trick here is to increment the sref at the start, so that the nfsds
    716  * will stop using it and clear ns_flag at the end so that it will not be
    717  * reassigned during cleanup.
    718  *
    719  * called at splsoftnet.
    720  */
    721 void
    722 nfsrv_zapsock(struct nfssvc_sock *slp)
    723 {
    724 	struct nfsuid *nuidp, *nnuidp;
    725 	struct nfsrv_descript *nwp;
    726 	struct socket *so;
    727 	struct mbuf *m;
    728 
    729 	if (nfsdsock_drain(slp)) {
    730 		return;
    731 	}
    732 	mutex_enter(&nfsd_lock);
    733 	if (slp->ns_gflags & SLP_G_DOREC) {
    734 		TAILQ_REMOVE(&nfssvc_sockpending, slp, ns_pending);
    735 		slp->ns_gflags &= ~SLP_G_DOREC;
    736 	}
    737 	mutex_exit(&nfsd_lock);
    738 
    739 	so = slp->ns_so;
    740 	KASSERT(so != NULL);
    741 	solock(so);
    742 	so->so_upcall = NULL;
    743 	so->so_upcallarg = NULL;
    744 	so->so_rcv.sb_flags &= ~SB_UPCALL;
    745 	soshutdown(so, SHUT_RDWR);
    746 	sounlock(so);
    747 
    748 	m_freem(slp->ns_raw);
    749 	m = slp->ns_rec;
    750 	while (m != NULL) {
    751 		struct mbuf *n;
    752 
    753 		n = m->m_nextpkt;
    754 		m_freem(m);
    755 		m = n;
    756 	}
    757 	/* XXX what about freeing ns_frag ? */
    758 	for (nuidp = TAILQ_FIRST(&slp->ns_uidlruhead); nuidp != 0;
    759 	    nuidp = nnuidp) {
    760 		nnuidp = TAILQ_NEXT(nuidp, nu_lru);
    761 		LIST_REMOVE(nuidp, nu_hash);
    762 		TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
    763 		if (nuidp->nu_flag & NU_NAM)
    764 			m_freem(nuidp->nu_nam);
    765 		kmem_free(nuidp, sizeof(*nuidp));
    766 	}
    767 	mutex_enter(&nfsd_lock);
    768 	while ((nwp = LIST_FIRST(&slp->ns_tq)) != NULL) {
    769 		LIST_REMOVE(nwp, nd_tq);
    770 		mutex_exit(&nfsd_lock);
    771 		nfsdreq_free(nwp);
    772 		mutex_enter(&nfsd_lock);
    773 	}
    774 	mutex_exit(&nfsd_lock);
    775 }
    776 
    777 /*
    778  * Derefence a server socket structure. If it has no more references and
    779  * is no longer valid, you can throw it away.
    780  */
    781 void
    782 nfsrv_slpderef(struct nfssvc_sock *slp)
    783 {
    784 	uint32_t ref;
    785 
    786 	mutex_enter(&nfsd_lock);
    787 	KASSERT(slp->ns_sref > 0);
    788 	ref = --slp->ns_sref;
    789 	if (ref == 0 && (slp->ns_flags & SLP_VALID) == 0) {
    790 		file_t *fp;
    791 
    792 		KASSERT((slp->ns_gflags & SLP_G_DOREC) == 0);
    793 		TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
    794 		mutex_exit(&nfsd_lock);
    795 
    796 		fp = slp->ns_fp;
    797 		if (fp != NULL) {
    798 			slp->ns_fp = NULL;
    799 			KASSERT(fp != NULL);
    800 			KASSERT(fp->f_data == slp->ns_so);
    801 			KASSERT(fp->f_count > 0);
    802 			closef(fp);
    803 			slp->ns_so = NULL;
    804 		}
    805 
    806 		if (slp->ns_nam)
    807 			m_free(slp->ns_nam);
    808 		nfsrv_sockfree(slp);
    809 	} else
    810 		mutex_exit(&nfsd_lock);
    811 }
    812 
    813 /*
    814  * Initialize the data structures for the server.
    815  * Handshake with any new nfsds starting up to avoid any chance of
    816  * corruption.
    817  */
    818 void
    819 nfsrv_init(int terminating)
    820 {
    821 	struct nfssvc_sock *slp;
    822 
    823 	if (!terminating) {
    824 		mutex_init(&nfsd_lock, MUTEX_DRIVER, IPL_SOFTNET);
    825 		cv_init(&nfsd_initcv, "nfsdinit");
    826 	}
    827 
    828 	mutex_enter(&nfsd_lock);
    829 	if (!terminating && (nfssvc_sockhead_flag & SLP_INIT) != 0)
    830 		panic("nfsd init");
    831 	nfssvc_sockhead_flag |= SLP_INIT;
    832 
    833 	if (terminating) {
    834 		KASSERT(SLIST_EMPTY(&nfsd_idle_head));
    835 		KASSERT(TAILQ_EMPTY(&nfsd_head));
    836 		while ((slp = TAILQ_FIRST(&nfssvc_sockhead)) != NULL) {
    837 			mutex_exit(&nfsd_lock);
    838 			KASSERT(slp->ns_sref == 0);
    839 			slp->ns_sref++;
    840 			nfsrv_zapsock(slp);
    841 			nfsrv_slpderef(slp);
    842 			mutex_enter(&nfsd_lock);
    843 		}
    844 		KASSERT(TAILQ_EMPTY(&nfssvc_sockpending));
    845 		mutex_exit(&nfsd_lock);
    846 		nfsrv_cleancache();	/* And clear out server cache */
    847 	} else {
    848 		mutex_exit(&nfsd_lock);
    849 		nfs_pub.np_valid = 0;
    850 	}
    851 
    852 	TAILQ_INIT(&nfssvc_sockhead);
    853 	TAILQ_INIT(&nfssvc_sockpending);
    854 
    855 	TAILQ_INIT(&nfsd_head);
    856 	SLIST_INIT(&nfsd_idle_head);
    857 	nfsd_head_flag &= ~NFSD_CHECKSLP;
    858 
    859 	nfs_udpsock = nfsrv_sockalloc();
    860 	nfs_udp6sock = nfsrv_sockalloc();
    861 
    862 	mutex_enter(&nfsd_lock);
    863 	nfssvc_sockhead_flag &= ~SLP_INIT;
    864 	cv_broadcast(&nfsd_initcv);
    865 	mutex_exit(&nfsd_lock);
    866 }
    867 
    868 void
    869 nfsrv_fini(void)
    870 {
    871 
    872 	nfsrv_init(true);
    873 	cv_destroy(&nfsd_initcv);
    874 	mutex_destroy(&nfsd_lock);
    875 }
    876 
    877 /*
    878  * Add entries to the server monitor log.
    879  */
    880 static void
    881 nfsd_rt(int sotype, struct nfsrv_descript *nd, int cacherep)
    882 {
    883 	struct timeval tv;
    884 	struct drt *rt;
    885 
    886 	rt = &nfsdrt.drt[nfsdrt.pos];
    887 	if (cacherep == RC_DOIT)
    888 		rt->flag = 0;
    889 	else if (cacherep == RC_REPLY)
    890 		rt->flag = DRT_CACHEREPLY;
    891 	else
    892 		rt->flag = DRT_CACHEDROP;
    893 	if (sotype == SOCK_STREAM)
    894 		rt->flag |= DRT_TCP;
    895 	if (nd->nd_flag & ND_NFSV3)
    896 		rt->flag |= DRT_NFSV3;
    897 	rt->proc = nd->nd_procnum;
    898 	if (mtod(nd->nd_nam, struct sockaddr *)->sa_family == AF_INET)
    899 	    rt->ipadr = mtod(nd->nd_nam, struct sockaddr_in *)->sin_addr.s_addr;
    900 	else
    901 	    rt->ipadr = INADDR_ANY;
    902 	getmicrotime(&tv);
    903 	rt->resptime = ((tv.tv_sec - nd->nd_starttime.tv_sec) * 1000000) +
    904 		(tv.tv_usec - nd->nd_starttime.tv_usec);
    905 	rt->tstamp = tv;
    906 	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
    907 }
    908