Home | History | Annotate | Line # | Download | only in nfs
nfs_socket.c revision 1.8.2.2
      1 /*
      2  * Copyright (c) 1989, 1991 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Rick Macklem at The University of Guelph.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  *
     36  *	from: @(#)nfs_socket.c	7.23 (Berkeley) 4/20/91
     37  *	$Id: nfs_socket.c,v 1.8.2.2 1993/11/14 22:22:24 mycroft Exp $
     38  */
     39 
     40 /*
     41  * Socket operations for use by nfs
     42  */
     43 
     44 #include <sys/param.h>
     45 #include <sys/systm.h>
     46 #include <sys/proc.h>
     47 #include <sys/mount.h>
     48 #include <sys/kernel.h>
     49 #include <sys/malloc.h>
     50 #include <sys/mbuf.h>
     51 #include <sys/namei.h>
     52 #include <sys/vnode.h>
     53 #include <sys/domain.h>
     54 #include <sys/protosw.h>
     55 #include <sys/socket.h>
     56 #include <sys/socketvar.h>
     57 #include <sys/syslog.h>
     58 #include <sys/tprintf.h>
     59 
     60 #include <netinet/in.h>
     61 #include <netinet/tcp.h>
     62 
     63 #include <nfs/rpcv2.h>
     64 #include <nfs/nfsv2.h>
     65 #include <nfs/nfs.h>
     66 #include <nfs/xdr_subs.h>
     67 #include <nfs/nfsm_subs.h>
     68 #include <nfs/nfsmount.h>
     69 
     70 #include <machine/cpu.h>
     71 
     72 #define	TRUE	1
     73 #define	FALSE	0
     74 
     75 /*
     76  * External data, mostly RPC constants in XDR form
     77  */
     78 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
     79 	rpc_msgaccepted, rpc_call;
     80 extern u_long nfs_prog, nfs_vers;
     81 /* Maybe these should be bits in a u_long ?? */
     82 /*
     83  * Static array that defines which nfs rpc's are nonidempotent
     84  */
     85 int nonidempotent[NFS_NPROCS] = {
     86 	FALSE,
     87 	FALSE,
     88 	TRUE,
     89 	FALSE,
     90 	FALSE,
     91 	FALSE,
     92 	FALSE,
     93 	FALSE,
     94 	TRUE,
     95 	TRUE,
     96 	TRUE,
     97 	TRUE,
     98 	TRUE,
     99 	TRUE,
    100 	TRUE,
    101 	TRUE,
    102 	FALSE,
    103 	FALSE,
    104 };
    105 static int compressrequest[NFS_NPROCS] = {
    106 	FALSE,
    107 	TRUE,
    108 	TRUE,
    109 	FALSE,
    110 	TRUE,
    111 	TRUE,
    112 	TRUE,
    113 	FALSE,
    114 	FALSE,
    115 	TRUE,
    116 	TRUE,
    117 	TRUE,
    118 	TRUE,
    119 	TRUE,
    120 	TRUE,
    121 	TRUE,
    122 	TRUE,
    123 	TRUE,
    124 };
    125 int	nfs_sbwait();
    126 void	nfs_disconnect();
    127 struct mbuf *nfs_compress(), *nfs_uncompress();
    128 
    129 
    130 struct nfsreq nfsreqh;
    131 int nfsrexmtthresh = NFS_FISHY;
    132 int nfs_tcpnodelay = 1;
    133 
    134 /*
    135  * Initialize sockets and congestion for a new NFS connection.
    136  * We do not free the sockaddr if error.
    137  */
    138 nfs_connect(nmp)
    139 	register struct nfsmount *nmp;
    140 {
    141 	register struct socket *so;
    142 	struct sockaddr *saddr;					/* 08 Sep 92*/
    143 	int s, error, bufsize;
    144 	struct mbuf *m;
    145 	struct sockaddr_in *sin;				/* 08 Sep 92*/
    146 	u_short tport;						/* 08 Sep 92*/
    147 
    148 	nmp->nm_so = (struct socket *)0;
    149 	saddr = mtod(nmp->nm_nam, struct sockaddr *);		/* 08 Sep 92*/
    150 	if (error = socreate(saddr->sa_family,			/* 08 Sep 92*/
    151 		&nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
    152 		goto bad;
    153 	so = nmp->nm_so;
    154 	nmp->nm_soflags = so->so_proto->pr_flags;
    155 
    156 	/*
    157 	 * 08 Sep 92
    158 	 *
    159 	 * Some servers require that the client port be a reserved port number.
    160 	 */
    161 	if (saddr->sa_family == AF_INET) {
    162 		MGET(m, M_WAIT, MT_SONAME);
    163 		sin = mtod(m, struct sockaddr_in *);
    164 		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
    165 		sin->sin_family = AF_INET;
    166 		sin->sin_addr.s_addr = INADDR_ANY;
    167 		tport = IPPORT_RESERVED - 1;
    168 		sin->sin_port = htons(tport);
    169 		while (sobind(so, m) == EADDRINUSE &&
    170 		       --tport > IPPORT_RESERVED / 2)
    171 			sin->sin_port = htons(tport);
    172 		m_freem(m);
    173 	}
    174 
    175 	if (nmp->nm_sotype == SOCK_DGRAM)
    176 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR),
    177 		    NFS_MAXPACKET);
    178 	else
    179 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)),
    180 		    NFS_MAXPACKET + sizeof(u_long));
    181 	if (error = soreserve(so, bufsize, bufsize))
    182 		goto bad;
    183 
    184 	/*
    185 	 * Protocols that do not require connections may be optionally left
    186 	 * unconnected for servers that reply from a port other than NFS_PORT.
    187 	 */
    188 	if (nmp->nm_flag & NFSMNT_NOCONN) {
    189 		if (nmp->nm_soflags & PR_CONNREQUIRED) {
    190 			error = ENOTCONN;
    191 			goto bad;
    192 		}
    193 	} else {
    194 		if (error = soconnect(so, nmp->nm_nam))
    195 			goto bad;
    196 
    197 		/*
    198 		 * Wait for the connection to complete. Cribbed from the
    199 		 * connect system call but with the wait at negative prio.
    200 		 */
    201 		s = splnet();
    202 		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
    203 			(void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0);
    204 		splx(s);
    205 		if (so->so_error) {
    206 			error = so->so_error;
    207 			goto bad;
    208 		}
    209 	}
    210 	if (nmp->nm_sotype == SOCK_DGRAM) {
    211 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    212 			so->so_rcv.sb_timeo = (5 * hz);
    213 			so->so_snd.sb_timeo = (5 * hz);
    214 		} else {
    215 			so->so_rcv.sb_timeo = 0;
    216 			so->so_snd.sb_timeo = 0;
    217 		}
    218 		nmp->nm_rto = NFS_TIMEO;
    219 	} else {
    220 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    221 			so->so_rcv.sb_timeo = (5 * hz);
    222 			so->so_snd.sb_timeo = (5 * hz);
    223 		} else {
    224 			so->so_rcv.sb_timeo = 0;
    225 			so->so_snd.sb_timeo = 0;
    226 		}
    227 		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    228 			MGET(m, M_WAIT, MT_SOOPTS);
    229 			*mtod(m, int *) = 1;
    230 			m->m_len = sizeof(int);
    231 			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
    232 		}
    233 		if (so->so_proto->pr_domain->dom_family == AF_INET &&
    234 		    so->so_proto->pr_protocol == IPPROTO_TCP &&
    235 		    nfs_tcpnodelay) {
    236 			MGET(m, M_WAIT, MT_SOOPTS);
    237 			*mtod(m, int *) = 1;
    238 			m->m_len = sizeof(int);
    239 			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
    240 		}
    241 		nmp->nm_rto = 10 * NFS_TIMEO;		/* XXX */
    242 	}
    243 	so->so_rcv.sb_flags |= SB_NOINTR;
    244 	so->so_snd.sb_flags |= SB_NOINTR;
    245 
    246 	/* Initialize other non-zero congestion variables */
    247 	nmp->nm_window = 2;			/* Initial send window */
    248 	nmp->nm_ssthresh = NFS_MAXWINDOW;	/* Slowstart threshold */
    249 	nmp->nm_rttvar = nmp->nm_rto << 1;
    250 	nmp->nm_sent = 0;
    251 	nmp->nm_currexmit = 0;
    252 	return (0);
    253 
    254 bad:
    255 	nfs_disconnect(nmp);
    256 	return (error);
    257 }
    258 
    259 /*
    260  * Reconnect routine:
    261  * Called when a connection is broken on a reliable protocol.
    262  * - clean up the old socket
    263  * - nfs_connect() again
    264  * - set R_MUSTRESEND for all outstanding requests on mount point
    265  * If this fails the mount point is DEAD!
    266  * nb: Must be called with the nfs_solock() set on the mount point.
    267  */
    268 nfs_reconnect(rep, nmp)
    269 	register struct nfsreq *rep;
    270 	register struct nfsmount *nmp;
    271 {
    272 	register struct nfsreq *rp;
    273 	int error;
    274 
    275 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    276 	    "trying reconnect");
    277 	while (error = nfs_connect(nmp)) {
    278 #ifdef lint
    279 		error = error;
    280 #endif /* lint */
    281 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp))
    282 			return (EINTR);
    283 		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
    284 	}
    285 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    286 	    "reconnected");
    287 
    288 	/*
    289 	 * Loop through outstanding request list and fix up all requests
    290 	 * on old socket.
    291 	 */
    292 	rp = nfsreqh.r_next;
    293 	while (rp != &nfsreqh) {
    294 		if (rp->r_nmp == nmp)
    295 			rp->r_flags |= R_MUSTRESEND;
    296 		rp = rp->r_next;
    297 	}
    298 	return (0);
    299 }
    300 
    301 /*
    302  * NFS disconnect. Clean up and unlink.
    303  */
    304 void
    305 nfs_disconnect(nmp)
    306 	register struct nfsmount *nmp;
    307 {
    308 	register struct socket *so;
    309 
    310 	if (nmp->nm_so) {
    311 		so = nmp->nm_so;
    312 		nmp->nm_so = (struct socket *)0;
    313 		soshutdown(so, 2);
    314 		soclose(so);
    315 	}
    316 }
    317 
    318 /*
    319  * This is the nfs send routine. For connection based socket types, it
    320  * must be called with an nfs_solock() on the socket.
    321  * "rep == NULL" indicates that it has been called from a server.
    322  */
    323 nfs_send(so, nam, top, rep)
    324 	register struct socket *so;
    325 	struct mbuf *nam;
    326 	register struct mbuf *top;
    327 	struct nfsreq *rep;
    328 {
    329 	struct mbuf *sendnam;
    330 	int error, soflags;
    331 
    332 	if (rep) {
    333 		if (rep->r_flags & R_SOFTTERM) {
    334 			m_freem(top);
    335 			return (EINTR);
    336 		}
    337 		if (rep->r_nmp->nm_so == NULL &&
    338 		    (error = nfs_reconnect(rep, rep->r_nmp)))
    339 			return (error);
    340 		rep->r_flags &= ~R_MUSTRESEND;
    341 		so = rep->r_nmp->nm_so;
    342 		soflags = rep->r_nmp->nm_soflags;
    343 	} else
    344 		soflags = so->so_proto->pr_flags;
    345 	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
    346 		sendnam = (struct mbuf *)0;
    347 	else
    348 		sendnam = nam;
    349 
    350 	error = sosend(so, sendnam, (struct uio *)0, top,
    351 		(struct mbuf *)0, 0);
    352 	if (error == EWOULDBLOCK && rep) {
    353 		if (rep->r_flags & R_SOFTTERM)
    354 			error = EINTR;
    355 		else {
    356 			rep->r_flags |= R_MUSTRESEND;
    357 			error = 0;
    358 		}
    359 	}
    360 	/*
    361 	 * Ignore socket errors??
    362 	 */
    363 	if (error && error != EINTR && error != ERESTART)
    364 		error = 0;
    365 	return (error);
    366 }
    367 
    368 /*
    369  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
    370  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
    371  * Mark and consolidate the data into a new mbuf list.
    372  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
    373  *     small mbufs.
    374  * For SOCK_STREAM we must be very careful to read an entire record once
    375  * we have read any of it, even if the system call has been interrupted.
    376  */
    377 nfs_receive(so, aname, mp, rep)
    378 	register struct socket *so;
    379 	struct mbuf **aname;
    380 	struct mbuf **mp;
    381 	register struct nfsreq *rep;
    382 {
    383 	struct uio auio;
    384 	struct iovec aio;
    385 	register struct mbuf *m;
    386 	struct mbuf *m2, *mnew, **mbp;
    387 	caddr_t fcp, tcp;
    388 	u_long len;
    389 	struct mbuf **getnam;
    390 	int error, siz, mlen, soflags, rcvflg;
    391 
    392 	/*
    393 	 * Set up arguments for soreceive()
    394 	 */
    395 	*mp = (struct mbuf *)0;
    396 	*aname = (struct mbuf *)0;
    397 	if (rep)
    398 		soflags = rep->r_nmp->nm_soflags;
    399 	else
    400 		soflags = so->so_proto->pr_flags;
    401 
    402 	/*
    403 	 * For reliable protocols, lock against other senders/receivers
    404 	 * in case a reconnect is necessary.
    405 	 * For SOCK_STREAM, first get the Record Mark to find out how much
    406 	 * more there is to get.
    407 	 * We must lock the socket against other receivers
    408 	 * until we have an entire rpc request/reply.
    409 	 */
    410 	if (soflags & PR_CONNREQUIRED) {
    411 tryagain:
    412 		/*
    413 		 * Check for fatal errors and resending request.
    414 		 */
    415 		if (rep) {
    416 			/*
    417 			 * Ugh: If a reconnect attempt just happened, nm_so
    418 			 * would have changed. NULL indicates a failed
    419 			 * attempt that has essentially shut down this
    420 			 * mount point.
    421 			 */
    422 			if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL ||
    423 				(rep->r_flags & R_SOFTTERM))
    424 				return (EINTR);
    425 			while (rep->r_flags & R_MUSTRESEND) {
    426 				m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
    427 				nfsstats.rpcretries++;
    428 				if (error = nfs_send(so, rep->r_nmp->nm_nam, m,
    429 					rep))
    430 					goto errout;
    431 			}
    432 		}
    433 		if ((soflags & PR_ATOMIC) == 0) {
    434 			aio.iov_base = (caddr_t) &len;
    435 			aio.iov_len = sizeof(u_long);
    436 			auio.uio_iov = &aio;
    437 			auio.uio_iovcnt = 1;
    438 			auio.uio_segflg = UIO_SYSSPACE;
    439 			auio.uio_rw = UIO_READ;
    440 			auio.uio_procp = (struct proc *)0;
    441 			auio.uio_offset = 0;
    442 			auio.uio_resid = sizeof(u_long);
    443 			do {
    444 			    rcvflg = MSG_WAITALL;
    445 			    error = soreceive(so, (struct mbuf **)0, &auio,
    446 				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
    447 			    if (error == EWOULDBLOCK && rep) {
    448 				if (rep->r_flags & R_SOFTTERM)
    449 					return (EINTR);
    450 				if (rep->r_flags & R_MUSTRESEND)
    451 					goto tryagain;
    452 			    }
    453 			} while (error == EWOULDBLOCK);
    454 			if (!error && auio.uio_resid > 0) {
    455 			    if (rep)
    456 				log(LOG_INFO,
    457 				   "short receive (%d/%d) from nfs server %s\n",
    458 				   sizeof(u_long) - auio.uio_resid,
    459 				   sizeof(u_long),
    460 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    461 			    error = EPIPE;
    462 			}
    463 			if (error)
    464 				goto errout;
    465 			len = ntohl(len) & ~0x80000000;
    466 			/*
    467 			 * This is SERIOUS! We are out of sync with the sender
    468 			 * and forcing a disconnect/reconnect is all I can do.
    469 			 */
    470 			if (len > NFS_MAXPACKET) {
    471 			    if (rep)
    472 				log(LOG_ERR, "%s (%d) from nfs server %s\n",
    473 				    "impossible packet length",
    474 				    len,
    475 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    476 			    error = EFBIG;
    477 			    goto errout;
    478 			}
    479 			auio.uio_resid = len;
    480 			do {
    481 			    rcvflg = MSG_WAITALL;
    482 			    error =  soreceive(so, (struct mbuf **)0,
    483 				&auio, mp, (struct mbuf **)0, &rcvflg);
    484 			} while (error == EWOULDBLOCK || error == EINTR ||
    485 				 error == ERESTART);
    486 			if (!error && auio.uio_resid > 0) {
    487 			    if (rep)
    488 				log(LOG_INFO,
    489 				   "short receive (%d/%d) from nfs server %s\n",
    490 				   len - auio.uio_resid, len,
    491 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    492 			    error = EPIPE;
    493 			}
    494 		} else {
    495 			auio.uio_resid = len = 1000000;	/* Anything Big */
    496 			do {
    497 			    rcvflg = 0;
    498 			    error =  soreceive(so, (struct mbuf **)0,
    499 				&auio, mp, (struct mbuf **)0, &rcvflg);
    500 			    if (error == EWOULDBLOCK && rep) {
    501 				if (rep->r_flags & R_SOFTTERM)
    502 					return (EINTR);
    503 				if (rep->r_flags & R_MUSTRESEND)
    504 					goto tryagain;
    505 			    }
    506 			} while (error == EWOULDBLOCK);
    507 			if (!error && *mp == NULL)
    508 				error = EPIPE;
    509 			len -= auio.uio_resid;
    510 		}
    511 errout:
    512 		if (error && rep && error != EINTR && error != ERESTART) {
    513 			m_freem(*mp);
    514 			*mp = (struct mbuf *)0;
    515 			if (error != EPIPE && rep)
    516 				log(LOG_INFO,
    517 				    "receive error %d from nfs server %s\n",
    518 				    error,
    519 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    520 			nfs_disconnect(rep->r_nmp);
    521 			error = nfs_reconnect(rep, rep->r_nmp);
    522 			if (!error)
    523 				goto tryagain;
    524 		}
    525 	} else {
    526 		if (so->so_state & SS_ISCONNECTED)
    527 			getnam = (struct mbuf **)0;
    528 		else
    529 			getnam = aname;
    530 		auio.uio_resid = len = 1000000;
    531 		do {
    532 			rcvflg = 0;
    533 			error =  soreceive(so, getnam, &auio, mp,
    534 				(struct mbuf **)0, &rcvflg);
    535 			if (error == EWOULDBLOCK && rep &&
    536 			    (rep->r_flags & R_SOFTTERM))
    537 				return (EINTR);
    538 		} while (error == EWOULDBLOCK);
    539 		len -= auio.uio_resid;
    540 	}
    541 	if (error) {
    542 		m_freem(*mp);
    543 		*mp = (struct mbuf *)0;
    544 	}
    545 	/*
    546 	 * Search for any mbufs that are not a multiple of 4 bytes long.
    547 	 * These could cause pointer alignment problems, so copy them to
    548 	 * well aligned mbufs.
    549 	 */
    550 	m = *mp;
    551 	mbp = mp;
    552 	while (m) {
    553 		/*
    554 		 * All this for something that may never happen.
    555 		 */
    556 		if (m->m_next && (m->m_len & 0x3)) {
    557 			printf("nfs_rcv odd length!\n");
    558 			mlen = 0;
    559 			while (m) {
    560 				fcp = mtod(m, caddr_t);
    561 				while (m->m_len > 0) {
    562 					if (mlen == 0) {
    563 						MGET(m2, M_WAIT, MT_DATA);
    564 						if (len >= MINCLSIZE)
    565 							MCLGET(m2, M_WAIT);
    566 						m2->m_len = 0;
    567 						mlen = M_TRAILINGSPACE(m2);
    568 						tcp = mtod(m2, caddr_t);
    569 						*mbp = m2;
    570 						mbp = &m2->m_next;
    571 					}
    572 					siz = MIN(mlen, m->m_len);
    573 					bcopy(fcp, tcp, siz);
    574 					m2->m_len += siz;
    575 					mlen -= siz;
    576 					len -= siz;
    577 					tcp += siz;
    578 					m->m_len -= siz;
    579 					fcp += siz;
    580 				}
    581 				MFREE(m, mnew);
    582 				m = mnew;
    583 			}
    584 			break;
    585 		}
    586 		len -= m->m_len;
    587 		mbp = &m->m_next;
    588 		m = m->m_next;
    589 	}
    590 	return (error);
    591 }
    592 
    593 /*
    594  * Implement receipt of reply on a socket.
    595  * We must search through the list of received datagrams matching them
    596  * with outstanding requests using the xid, until ours is found.
    597  */
    598 /* ARGSUSED */
    599 nfs_reply(nmp, myrep)
    600 	struct nfsmount *nmp;
    601 	struct nfsreq *myrep;
    602 {
    603 	register struct mbuf *m;
    604 	register struct nfsreq *rep;
    605 	register int error = 0;
    606 	u_long rxid;
    607 	struct mbuf *mp, *nam;
    608 	char *cp;
    609 	int cnt, xfer;
    610 
    611 	/*
    612 	 * Loop around until we get our own reply
    613 	 */
    614 	for (;;) {
    615 		/*
    616 		 * Lock against other receivers so that I don't get stuck in
    617 		 * sbwait() after someone else has received my reply for me.
    618 		 * Also necessary for connection based protocols to avoid
    619 		 * race conditions during a reconnect.
    620 		 */
    621 		nfs_solock(&nmp->nm_flag);
    622 		/* Already received, bye bye */
    623 		if (myrep->r_mrep != NULL) {
    624 			nfs_sounlock(&nmp->nm_flag);
    625 			return (0);
    626 		}
    627 		/*
    628 		 * Get the next Rpc reply off the socket
    629 		 */
    630 		if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) {
    631 			nfs_sounlock(&nmp->nm_flag);
    632 
    633 			/*
    634 			 * Ignore routing errors on connectionless protocols??
    635 			 */
    636 			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
    637 				nmp->nm_so->so_error = 0;
    638 				continue;
    639 			}
    640 
    641 			/*
    642 			 * Otherwise cleanup and return a fatal error.
    643 			 */
    644 			if (myrep->r_flags & R_TIMING) {
    645 				myrep->r_flags &= ~R_TIMING;
    646 				nmp->nm_rtt = -1;
    647 			}
    648 			if (myrep->r_flags & R_SENT) {
    649 				myrep->r_flags &= ~R_SENT;
    650 				nmp->nm_sent--;
    651 			}
    652 			return (error);
    653 		}
    654 
    655 		/*
    656 		 * Get the xid and check that it is an rpc reply
    657 		 */
    658 		m = mp;
    659 		while (m && m->m_len == 0)
    660 			m = m->m_next;
    661 		if (m == NULL) {
    662 			nfsstats.rpcinvalid++;
    663 			m_freem(mp);
    664 			nfs_sounlock(&nmp->nm_flag);
    665 			continue;
    666 		}
    667 		bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED);
    668 		/*
    669 		 * Loop through the request list to match up the reply
    670 		 * Iff no match, just drop the datagram
    671 		 */
    672 		m = mp;
    673 		rep = nfsreqh.r_next;
    674 		while (rep != &nfsreqh) {
    675 			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
    676 				/* Found it.. */
    677 				rep->r_mrep = m;
    678 				/*
    679 				 * Update timing
    680 				 */
    681 				if (rep->r_flags & R_TIMING) {
    682 					nfs_updatetimer(rep->r_nmp);
    683 					rep->r_flags &= ~R_TIMING;
    684 					rep->r_nmp->nm_rtt = -1;
    685 				}
    686 				if (rep->r_flags & R_SENT) {
    687 					rep->r_flags &= ~R_SENT;
    688 					rep->r_nmp->nm_sent--;
    689 				}
    690 				break;
    691 			}
    692 			rep = rep->r_next;
    693 		}
    694 		nfs_sounlock(&nmp->nm_flag);
    695 		if (nam)
    696 			m_freem(nam);
    697 		/*
    698 		 * If not matched to a request, drop it.
    699 		 * If it's mine, get out.
    700 		 */
    701 		if (rep == &nfsreqh) {
    702 			nfsstats.rpcunexpected++;
    703 			m_freem(m);
    704 		} else if (rep == myrep)
    705 			return (0);
    706 	}
    707 }
    708 
    709 /*
    710  * nfs_request - goes something like this
    711  *	- fill in request struct
    712  *	- links it into list
    713  *	- calls nfs_send() for first transmit
    714  *	- calls nfs_receive() to get reply
    715  *	- break down rpc header and return with nfs reply pointed to
    716  *	  by mrep or error
    717  * nb: always frees up mreq mbuf list
    718  */
    719 nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp)
    720 	struct vnode *vp;
    721 	struct mbuf *mreq;
    722 	u_long xid;
    723 	int procnum;
    724 	struct proc *procp;
    725 	int tryhard;
    726 	struct mount *mp;
    727 	struct mbuf **mrp;
    728 	struct mbuf **mdp;
    729 	caddr_t *dposp;
    730 {
    731 	register struct mbuf *m, *mrep;
    732 	register struct nfsreq *rep;
    733 	register u_long *tl;
    734 	register int len;
    735 	struct nfsmount *nmp;
    736 	struct mbuf *md;
    737 	struct nfsreq *reph;
    738 	caddr_t dpos;
    739 	char *cp2;
    740 	int t1;
    741 	int s, compressed;
    742 	int error = 0;
    743 
    744 	nmp = VFSTONFS(mp);
    745 	m = mreq;
    746 	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
    747 	rep->r_xid = xid;
    748 	rep->r_nmp = nmp;
    749 	rep->r_vp = vp;
    750 	rep->r_procp = procp;
    751 	if ((nmp->nm_flag & NFSMNT_SOFT) ||
    752 	    ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard))
    753 		rep->r_retry = nmp->nm_retry;
    754 	else
    755 		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
    756 	rep->r_flags = rep->r_rexmit = 0;
    757 	/*
    758 	 * Three cases:
    759 	 * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
    760 	 * - idempotent requests on SOCK_DGRAM use 0
    761 	 * - Reliable transports, NFS_RELIABLETIMEO
    762 	 *   Timeouts are still done on reliable transports to ensure detection
    763 	 *   of excessive connection delay.
    764 	 */
    765 	if (nmp->nm_sotype != SOCK_DGRAM)
    766 		rep->r_timerinit = -NFS_RELIABLETIMEO;
    767 	else if (nonidempotent[procnum])
    768 		rep->r_timerinit = -NFS_MINIDEMTIMEO;
    769 	else
    770 		rep->r_timerinit = 0;
    771 	rep->r_timer = rep->r_timerinit;
    772 	rep->r_mrep = NULL;
    773 	len = 0;
    774 	while (m) {
    775 		len += m->m_len;
    776 		m = m->m_next;
    777 	}
    778 	mreq->m_pkthdr.len = len;
    779 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
    780 	compressed = 0;
    781 	m = mreq;
    782 	if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) {
    783 		mreq = nfs_compress(mreq);
    784 		if (mreq != m) {
    785 			len = mreq->m_pkthdr.len;
    786 			compressed++;
    787 		}
    788 	}
    789 	/*
    790 	 * For non-atomic protocols, insert a Sun RPC Record Mark.
    791 	 */
    792 	if ((nmp->nm_soflags & PR_ATOMIC) == 0) {
    793 		M_PREPEND(mreq, sizeof(u_long), M_WAIT);
    794 		*mtod(mreq, u_long *) = htonl(0x80000000 | len);
    795 	}
    796 	rep->r_mreq = mreq;
    797 
    798 	/*
    799 	 * Do the client side RPC.
    800 	 */
    801 	nfsstats.rpcrequests++;
    802 	/*
    803 	 * Chain request into list of outstanding requests. Be sure
    804 	 * to put it LAST so timer finds oldest requests first.
    805 	 */
    806 	s = splnet();
    807 	reph = &nfsreqh;
    808 	reph->r_prev->r_next = rep;
    809 	rep->r_prev = reph->r_prev;
    810 	reph->r_prev = rep;
    811 	rep->r_next = reph;
    812 	/*
    813 	 * If backing off another request or avoiding congestion, don't
    814 	 * send this one now but let timer do it. If not timing a request,
    815 	 * do it now.
    816 	 */
    817 	if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM ||
    818 	    (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) {
    819 		nmp->nm_sent++;
    820 		rep->r_flags |= R_SENT;
    821 		if (nmp->nm_rtt == -1) {
    822 			nmp->nm_rtt = 0;
    823 			rep->r_flags |= R_TIMING;
    824 		}
    825 		splx(s);
    826 		m = m_copym(mreq, 0, M_COPYALL, M_WAIT);
    827 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    828 			nfs_solock(&nmp->nm_flag);
    829 		error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
    830 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    831 			nfs_sounlock(&nmp->nm_flag);
    832 		if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error))
    833 			nmp->nm_so->so_error = error = 0;
    834 	} else
    835 		splx(s);
    836 
    837 	/*
    838 	 * Wait for the reply from our send or the timer's.
    839 	 */
    840 	if (!error)
    841 		error = nfs_reply(nmp, rep);
    842 
    843 	/*
    844 	 * RPC done, unlink the request.
    845 	 */
    846 	s = splnet();
    847 	rep->r_prev->r_next = rep->r_next;
    848 	rep->r_next->r_prev = rep->r_prev;
    849 	splx(s);
    850 
    851 	/*
    852 	 * If there was a successful reply and a tprintf msg.
    853 	 * tprintf a response.
    854 	 */
    855 	if (!error && (rep->r_flags & R_TPRINTFMSG))
    856 		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    857 		    "is alive again");
    858 	m_freem(rep->r_mreq);
    859 	mrep = rep->r_mrep;
    860 	FREE((caddr_t)rep, M_NFSREQ);
    861 	if (error)
    862 		return (error);
    863 
    864 	if (compressed)
    865 		mrep = nfs_uncompress(mrep);
    866 	md = mrep;
    867 	/*
    868 	 * break down the rpc header and check if ok
    869 	 */
    870 	dpos = mtod(md, caddr_t);
    871 	nfsm_disect(tl, u_long *, 5*NFSX_UNSIGNED);
    872 	tl += 2;
    873 	if (*tl++ == rpc_msgdenied) {
    874 		if (*tl == rpc_mismatch)
    875 			error = EOPNOTSUPP;
    876 		else
    877 			error = EACCES;
    878 		m_freem(mrep);
    879 		return (error);
    880 	}
    881 	/*
    882 	 * skip over the auth_verf, someday we may want to cache auth_short's
    883 	 * for nfs_reqhead(), but for now just dump it
    884 	 */
    885 	if (*++tl != 0) {
    886 		len = nfsm_rndup(fxdr_unsigned(long, *tl));
    887 		nfsm_adv(len);
    888 	}
    889 	nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    890 	/* 0 == ok */
    891 	if (*tl == 0) {
    892 		nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    893 		if (*tl != 0) {
    894 			error = fxdr_unsigned(int, *tl);
    895 			m_freem(mrep);
    896 			return (error);
    897 		}
    898 		*mrp = mrep;
    899 		*mdp = md;
    900 		*dposp = dpos;
    901 		return (0);
    902 	}
    903 	m_freem(mrep);
    904 	return (EPROTONOSUPPORT);
    905 nfsmout:
    906 	return (error);
    907 }
    908 
    909 /*
    910  * Get a request for the server main loop
    911  * - receive a request via. nfs_soreceive()
    912  * - verify it
    913  * - fill in the cred struct.
    914  */
    915 nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr,
    916 	msk, mtch, wascomp, repstat)				/* 08 Aug 92*/
    917 	struct socket *so;
    918 	u_long prog;
    919 	u_long vers;
    920 	int maxproc;
    921 	struct mbuf **nam;
    922 	struct mbuf **mrp;
    923 	struct mbuf **mdp;
    924 	caddr_t *dposp;
    925 	u_long *retxid;
    926 	u_long *procnum;
    927 	register struct ucred *cr;
    928 	struct mbuf *msk, *mtch;
    929 	int *wascomp, *repstat;					/* 08 Aug 92*/
    930 {
    931 	register int i;
    932 	register u_long *tl;
    933 	register long t1;
    934 	caddr_t dpos, cp2;
    935 	int error = 0;
    936 	struct mbuf *mrep, *md;
    937 	int len;
    938 
    939 	*repstat = 0;						/* 08 Aug 92*/
    940 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    941 		error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    942 	} else {
    943 		mrep = (struct mbuf *)0;
    944 		do {
    945 			if (mrep) {
    946 				m_freem(*nam);
    947 				m_freem(mrep);
    948 			}
    949 			error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    950 		} while (!error && nfs_badnam(*nam, msk, mtch));
    951 	}
    952 	if (error)
    953 		return (error);
    954 	md = mrep;
    955 	mrep = nfs_uncompress(mrep);
    956 	if (mrep != md) {
    957 		*wascomp = 1;
    958 		md = mrep;
    959 	} else
    960 		*wascomp = 0;
    961 	dpos = mtod(mrep, caddr_t);
    962 	nfsm_disect(tl, u_long *, 10*NFSX_UNSIGNED);
    963 	*retxid = fxdr_unsigned(u_long, *tl++);
    964 	if (*tl++ != rpc_call || *tl++ != rpc_vers) {		/* 08 Aug 92*/
    965 		*mrp = mrep;
    966 		*procnum = NFSPROC_NOOP;
    967 		*repstat = ERPCMISMATCH;
    968 		return (0);
    969 	}
    970 	if (*tl++ != prog) {
    971 		*mrp = mrep;					/* 08 Aug 92*/
    972 		*procnum = NFSPROC_NOOP;
    973 		*repstat = EPROGUNAVAIL;
    974 		return (0);
    975 	}
    976 	if (*tl++ != vers) {
    977 		*mrp = mrep;					/* 08 Aug 92*/
    978 		*procnum = NFSPROC_NOOP;
    979 		*repstat = EPROGMISMATCH;
    980 		return (0);
    981 	}
    982 	*procnum = fxdr_unsigned(u_long, *tl++);
    983 	if (*procnum == NFSPROC_NULL) {
    984 		*mrp = mrep;
    985 		return (0);
    986 	}
    987 	if (*procnum > maxproc || *tl++ != rpc_auth_unix) {
    988 		*mrp = mrep;					/* 08 Aug 92*/
    989 		*procnum = NFSPROC_NOOP;
    990 		*repstat = EPROCUNAVAIL;
    991 		return (0);
    992 	}
    993 	len = fxdr_unsigned(int, *tl++);
    994 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
    995 		m_freem(mrep);
    996 		return (EBADRPC);
    997 	}
    998 	len = fxdr_unsigned(int, *++tl);
    999 	if (len < 0 || len > NFS_MAXNAMLEN) {
   1000 		m_freem(mrep);
   1001 		return (EBADRPC);
   1002 	}
   1003 	nfsm_adv(nfsm_rndup(len));
   1004 	nfsm_disect(tl, u_long *, 3*NFSX_UNSIGNED);
   1005 	cr->cr_uid = fxdr_unsigned(uid_t, *tl++);
   1006 	cr->cr_gid = fxdr_unsigned(gid_t, *tl++);
   1007 	len = fxdr_unsigned(int, *tl);
   1008 	if (len < 0 || len > RPCAUTH_UNIXGIDS) {
   1009 		m_freem(mrep);
   1010 		return (EBADRPC);
   1011 	}
   1012 	nfsm_disect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
   1013 	for (i = 1; i <= len; i++)
   1014 		if (i < NGROUPS)
   1015 			cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
   1016 		else
   1017 			tl++;
   1018 	cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
   1019 	/*
   1020 	 * Do we have any use for the verifier.
   1021 	 * According to the "Remote Procedure Call Protocol Spec." it
   1022 	 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
   1023 	 * For now, just skip over it
   1024 	 */
   1025 	len = fxdr_unsigned(int, *++tl);
   1026 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
   1027 		m_freem(mrep);
   1028 		return (EBADRPC);
   1029 	}
   1030 	if (len > 0)
   1031 		nfsm_adv(nfsm_rndup(len));
   1032 	*mrp = mrep;
   1033 	*mdp = md;
   1034 	*dposp = dpos;
   1035 	return (0);
   1036 nfsmout:
   1037 	return (error);
   1038 }
   1039 
   1040 /*
   1041  * Generate the rpc reply header
   1042  * siz arg. is used to decide if adding a cluster is worthwhile
   1043  */
   1044 nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
   1045 	int siz;
   1046 	u_long retxid;
   1047 	int err;
   1048 	struct mbuf **mrq;
   1049 	struct mbuf **mbp;
   1050 	caddr_t *bposp;
   1051 {
   1052 	register u_long *tl;
   1053 	register long t1;
   1054 	caddr_t bpos;
   1055 	struct mbuf *mreq, *mb, *mb2;
   1056 
   1057 	NFSMGETHDR(mreq);
   1058 	mb = mreq;
   1059 	if ((siz+RPC_REPLYSIZ) > MHLEN)
   1060 		MCLGET(mreq, M_WAIT);
   1061 	tl = mtod(mreq, u_long *);
   1062 	mreq->m_len = 6*NFSX_UNSIGNED;
   1063 	bpos = ((caddr_t)tl)+mreq->m_len;
   1064 	*tl++ = txdr_unsigned(retxid);
   1065 	*tl++ = rpc_reply;
   1066 	if (err == ERPCMISMATCH) {
   1067 		*tl++ = rpc_msgdenied;
   1068 		*tl++ = rpc_mismatch;
   1069 		*tl++ = txdr_unsigned(2);
   1070 		*tl = txdr_unsigned(2);
   1071 	} else {
   1072 		*tl++ = rpc_msgaccepted;
   1073 		*tl++ = 0;
   1074 		*tl++ = 0;
   1075 		switch (err) {
   1076 		case EPROGUNAVAIL:
   1077 			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
   1078 			break;
   1079 		case EPROGMISMATCH:
   1080 			*tl = txdr_unsigned(RPC_PROGMISMATCH);
   1081 			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
   1082 			*tl++ = txdr_unsigned(2);
   1083 			*tl = txdr_unsigned(2);	/* someday 3 */
   1084 			break;
   1085 		case EPROCUNAVAIL:
   1086 			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
   1087 			break;
   1088 		default:
   1089 			*tl = 0;
   1090 			if (err != VNOVAL) {
   1091 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
   1092 				*tl = txdr_unsigned(err);
   1093 			}
   1094 			break;
   1095 		};
   1096 	}
   1097 	*mrq = mreq;
   1098 	*mbp = mb;
   1099 	*bposp = bpos;
   1100 	if (err != 0 && err != VNOVAL)
   1101 		nfsstats.srvrpc_errs++;
   1102 	return (0);
   1103 }
   1104 
   1105 /*
   1106  * Nfs timer routine
   1107  * Scan the nfsreq list and retranmit any requests that have timed out
   1108  * To avoid retransmission attempts on STREAM sockets (in the future) make
   1109  * sure to set the r_retry field to 0 (implies nm_retry == 0).
   1110  */
   1111 void
   1112 nfs_timer()
   1113 {
   1114 	register struct nfsreq *rep;
   1115 	register struct mbuf *m;
   1116 	register struct socket *so;
   1117 	register struct nfsmount *nmp;
   1118 	int s, error;
   1119 
   1120 	s = splnet();
   1121 	for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
   1122 		nmp = rep->r_nmp;
   1123 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) ||
   1124 		    (so = nmp->nm_so) == NULL)
   1125 			continue;
   1126 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) {
   1127 			rep->r_flags |= R_SOFTTERM;
   1128 			continue;
   1129 		}
   1130 		if (rep->r_flags & R_TIMING)	/* update rtt in mount */
   1131 			nmp->nm_rtt++;
   1132 		/* If not timed out */
   1133 		if (++rep->r_timer < nmp->nm_rto)
   1134 			continue;
   1135 		/* Do backoff and save new timeout in mount */
   1136 		if (rep->r_flags & R_TIMING) {
   1137 			nfs_backofftimer(nmp);
   1138 			rep->r_flags &= ~R_TIMING;
   1139 			nmp->nm_rtt = -1;
   1140 		}
   1141 		if (rep->r_flags & R_SENT) {
   1142 			rep->r_flags &= ~R_SENT;
   1143 			nmp->nm_sent--;
   1144 		}
   1145 
   1146 		/*
   1147 		 * Check for too many retries on soft mount.
   1148 		 * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
   1149 		 */
   1150 		if (++rep->r_rexmit > NFS_MAXREXMIT)
   1151 			rep->r_rexmit = NFS_MAXREXMIT;
   1152 
   1153 		/*
   1154 		 * Check for server not responding
   1155 		 */
   1156 		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
   1157 		     rep->r_rexmit > NFS_FISHY) {
   1158 			nfs_msg(rep->r_procp,
   1159 			    nmp->nm_mountp->mnt_stat.f_mntfromname,
   1160 			    "not responding");
   1161 			rep->r_flags |= R_TPRINTFMSG;
   1162 		}
   1163 		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
   1164 			nfsstats.rpctimeouts++;
   1165 			rep->r_flags |= R_SOFTTERM;
   1166 			continue;
   1167 		}
   1168 		if (nmp->nm_sotype != SOCK_DGRAM)
   1169 			continue;
   1170 
   1171 		/*
   1172 		 * If there is enough space and the window allows..
   1173 		 *	Resend it
   1174 		 */
   1175 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
   1176 		       nmp->nm_sent < nmp->nm_window &&
   1177 		       (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
   1178 			nfsstats.rpcretries++;
   1179 			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
   1180 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1181 			    (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0);
   1182 			else
   1183 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1184 			    nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0);
   1185 			if (error) {
   1186 				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
   1187 					so->so_error = 0;
   1188 			} else {
   1189 				/*
   1190 				 * We need to time the request even though we
   1191 				 * are retransmitting.
   1192 				 */
   1193 				nmp->nm_rtt = 0;
   1194 				nmp->nm_sent++;
   1195 				rep->r_flags |= (R_SENT|R_TIMING);
   1196 				rep->r_timer = rep->r_timerinit;
   1197 			}
   1198 		}
   1199 	}
   1200 	splx(s);
   1201 	timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ);
   1202 }
   1203 
   1204 /*
   1205  * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
   1206  * used here. The timer state is held in the nfsmount structure and
   1207  * a single request is used to clock the response. When successful
   1208  * the rtt smoothing in nfs_updatetimer is used, when failed the backoff
   1209  * is done by nfs_backofftimer. We also log failure messages in these
   1210  * routines.
   1211  *
   1212  * Congestion variables are held in the nfshost structure which
   1213  * is referenced by nfsmounts and shared per-server. This separation
   1214  * makes it possible to do per-mount timing which allows varying disk
   1215  * access times to be dealt with, while preserving a network oriented
   1216  * congestion control scheme.
   1217  *
   1218  * The windowing implements the Jacobson/Karels slowstart algorithm
   1219  * with adjusted scaling factors. We start with one request, then send
   1220  * 4 more after each success until the ssthresh limit is reached, then
   1221  * we increment at a rate proportional to the window. On failure, we
   1222  * remember 3/4 the current window and clamp the send limit to 1. Note
   1223  * ICMP source quench is not reflected in so->so_error so we ignore that
   1224  * for now.
   1225  *
   1226  * NFS behaves much more like a transport protocol with these changes,
   1227  * shedding the teenage pedal-to-the-metal tendencies of "other"
   1228  * implementations.
   1229  *
   1230  * Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
   1231  */
   1232 
   1233 /*
   1234  * The TCP algorithm was not forgiving enough. Because the NFS server
   1235  * responds only after performing lookups/diskio/etc, we have to be
   1236  * more prepared to accept a spiky variance. The TCP algorithm is:
   1237  * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
   1238  */
   1239 #define NFS_RTO(nmp)	(((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
   1240 
   1241 nfs_updatetimer(nmp)
   1242 	register struct nfsmount *nmp;
   1243 {
   1244 
   1245 	/* If retransmitted, clear and return */
   1246 	if (nmp->nm_rexmit || nmp->nm_currexmit) {
   1247 		nmp->nm_rexmit = nmp->nm_currexmit = 0;
   1248 		return;
   1249 	}
   1250 	/* If have a measurement, do smoothing */
   1251 	if (nmp->nm_srtt) {
   1252 		register short delta;
   1253 		delta = nmp->nm_rtt - (nmp->nm_srtt >> 3);
   1254 		if ((nmp->nm_srtt += delta) <= 0)
   1255 			nmp->nm_srtt = 1;
   1256 		if (delta < 0)
   1257 			delta = -delta;
   1258 		delta -= (nmp->nm_rttvar >> 2);
   1259 		if ((nmp->nm_rttvar += delta) <= 0)
   1260 			nmp->nm_rttvar = 1;
   1261 	/* Else initialize */
   1262 	} else {
   1263 		nmp->nm_rttvar = nmp->nm_rtt << 1;
   1264 		if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2;
   1265 		nmp->nm_srtt = nmp->nm_rttvar << 2;
   1266 	}
   1267 	/* Compute new Retransmission TimeOut and clip */
   1268 	nmp->nm_rto = NFS_RTO(nmp);
   1269 	if (nmp->nm_rto < NFS_MINTIMEO)
   1270 		nmp->nm_rto = NFS_MINTIMEO;
   1271 	else if (nmp->nm_rto > NFS_MAXTIMEO)
   1272 		nmp->nm_rto = NFS_MAXTIMEO;
   1273 
   1274 	/* Update window estimate */
   1275 	if (nmp->nm_window < nmp->nm_ssthresh)	/* quickly */
   1276 		nmp->nm_window += 4;
   1277 	else {						/* slowly */
   1278 		register long incr = ++nmp->nm_winext;
   1279 		incr = (incr * incr) / nmp->nm_window;
   1280 		if (incr > 0) {
   1281 			nmp->nm_winext = 0;
   1282 			++nmp->nm_window;
   1283 		}
   1284 	}
   1285 	if (nmp->nm_window > NFS_MAXWINDOW)
   1286 		nmp->nm_window = NFS_MAXWINDOW;
   1287 }
   1288 
   1289 nfs_backofftimer(nmp)
   1290 	register struct nfsmount *nmp;
   1291 {
   1292 	register unsigned long newrto;
   1293 
   1294 	/* Clip shift count */
   1295 	if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto)
   1296 		nmp->nm_rexmit = 8 * sizeof nmp->nm_rto;
   1297 	/* Back off RTO exponentially */
   1298 	newrto = NFS_RTO(nmp);
   1299 	newrto <<= (nmp->nm_rexmit - 1);
   1300 	if (newrto == 0 || newrto > NFS_MAXTIMEO)
   1301 		newrto = NFS_MAXTIMEO;
   1302 	nmp->nm_rto = newrto;
   1303 
   1304 	/* If too many retries, message, assume a bogus RTT and re-measure */
   1305 	if (nmp->nm_currexmit < nmp->nm_rexmit) {
   1306 		nmp->nm_currexmit = nmp->nm_rexmit;
   1307 		if (nmp->nm_currexmit >= nfsrexmtthresh) {
   1308 			if (nmp->nm_currexmit == nfsrexmtthresh) {
   1309 				nmp->nm_rttvar += (nmp->nm_srtt >> 2);
   1310 				nmp->nm_srtt = 0;
   1311 			}
   1312 		}
   1313 	}
   1314 	/* Close down window but remember this point (3/4 current) for later */
   1315 	nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2;
   1316 	nmp->nm_window = 1;
   1317 	nmp->nm_winext = 0;
   1318 }
   1319 
   1320 /*
   1321  * Test for a termination signal pending on procp.
   1322  * This is used for NFSMNT_INT mounts.
   1323  */
   1324 nfs_sigintr(p)
   1325 	register struct proc *p;
   1326 {
   1327 	if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) &
   1328 	    NFSINT_SIGMASK))
   1329 		return (1);
   1330 	else
   1331 		return (0);
   1332 }
   1333 
   1334 nfs_msg(p, server, msg)
   1335 	struct proc *p;
   1336 	char *server, *msg;
   1337 {
   1338 	tpr_t tpr;
   1339 
   1340 	if (p)
   1341 		tpr = tprintf_open(p);
   1342 	else
   1343 		tpr = NULL;
   1344 	tprintf(tpr, "nfs server %s: %s\n", server, msg);
   1345 	tprintf_close(tpr);
   1346 }
   1347 
   1348 /*
   1349  * Lock a socket against others.
   1350  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
   1351  * and also to avoid race conditions between the processes with nfs requests
   1352  * in progress when a reconnect is necessary.
   1353  */
   1354 nfs_solock(flagp)
   1355 	register int *flagp;
   1356 {
   1357 
   1358 	while (*flagp & NFSMNT_SCKLOCK) {
   1359 		*flagp |= NFSMNT_WANTSCK;
   1360 		(void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0);
   1361 	}
   1362 	*flagp |= NFSMNT_SCKLOCK;
   1363 }
   1364 
   1365 /*
   1366  * Unlock the stream socket for others.
   1367  */
   1368 nfs_sounlock(flagp)
   1369 	register int *flagp;
   1370 {
   1371 
   1372 	if ((*flagp & NFSMNT_SCKLOCK) == 0)
   1373 		panic("nfs sounlock");
   1374 	*flagp &= ~NFSMNT_SCKLOCK;
   1375 	if (*flagp & NFSMNT_WANTSCK) {
   1376 		*flagp &= ~NFSMNT_WANTSCK;
   1377 		wakeup((caddr_t)flagp);
   1378 	}
   1379 }
   1380 
   1381 /*
   1382  * This function compares two net addresses by family and returns TRUE
   1383  * if they are the same.
   1384  * If there is any doubt, return FALSE.
   1385  */
   1386 nfs_netaddr_match(nam1, nam2)
   1387 	struct mbuf *nam1, *nam2;
   1388 {
   1389 	register struct sockaddr *saddr1, *saddr2;
   1390 
   1391 	saddr1 = mtod(nam1, struct sockaddr *);
   1392 	saddr2 = mtod(nam2, struct sockaddr *);
   1393 	if (saddr1->sa_family != saddr2->sa_family)
   1394 		return (0);
   1395 
   1396 	/*
   1397 	 * Must do each address family separately since unused fields
   1398 	 * are undefined values and not always zeroed.
   1399 	 */
   1400 	switch (saddr1->sa_family) {
   1401 	case AF_INET:
   1402 		if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr ==
   1403 		    ((struct sockaddr_in *)saddr2)->sin_addr.s_addr)
   1404 			return (1);
   1405 		break;
   1406 	default:
   1407 		break;
   1408 	};
   1409 	return (0);
   1410 }
   1411 
   1412 /*
   1413  * Check the hostname fields for nfsd's mask and match fields.
   1414  * By address family:
   1415  * - Bitwise AND the mask with the host address field
   1416  * - Compare for == with match
   1417  * return TRUE if not equal
   1418  */
   1419 nfs_badnam(nam, msk, mtch)
   1420 	register struct mbuf *nam, *msk, *mtch;
   1421 {
   1422 	switch (mtod(nam, struct sockaddr *)->sa_family) {
   1423 	case AF_INET:
   1424 		return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr &
   1425 			 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) !=
   1426 			 mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr);
   1427 	default:
   1428 		printf("nfs_badmatch, unknown sa_family\n");
   1429 		return (0);
   1430 	};
   1431 }
   1432