Home | History | Annotate | Line # | Download | only in nfs
nfs_socket.c revision 1.11
      1 /*
      2  * Copyright (c) 1989, 1991 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Rick Macklem at The University of Guelph.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  *
     36  *	from: @(#)nfs_socket.c	7.23 (Berkeley) 4/20/91
     37  *	$Id: nfs_socket.c,v 1.11 1994/04/10 06:45:59 cgd Exp $
     38  */
     39 
     40 /*
     41  * Socket operations for use by nfs
     42  */
     43 
     44 #include <sys/param.h>
     45 #include <sys/systm.h>
     46 #include <sys/proc.h>
     47 #include <sys/mount.h>
     48 #include <sys/kernel.h>
     49 #include <sys/malloc.h>
     50 #include <sys/mbuf.h>
     51 #include <sys/namei.h>
     52 #include <sys/vnode.h>
     53 #include <sys/domain.h>
     54 #include <sys/protosw.h>
     55 #include <sys/socket.h>
     56 #include <sys/socketvar.h>
     57 #include <sys/syslog.h>
     58 #include <sys/tprintf.h>
     59 
     60 #include <netinet/in.h>
     61 #include <netinet/tcp.h>
     62 
     63 #include <nfs/rpcv2.h>
     64 #include <nfs/nfsv2.h>
     65 #include <nfs/nfs.h>
     66 #include <nfs/xdr_subs.h>
     67 #include <nfs/nfsm_subs.h>
     68 #include <nfs/nfsmount.h>
     69 
     70 #define	TRUE	1
     71 #define	FALSE	0
     72 
     73 /*
     74  * External data, mostly RPC constants in XDR form
     75  */
     76 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
     77 	rpc_msgaccepted, rpc_call;
     78 extern u_long nfs_prog, nfs_vers;
     79 /* Maybe these should be bits in a u_long ?? */
     80 /*
     81  * Static array that defines which nfs rpc's are nonidempotent
     82  */
     83 int nonidempotent[NFS_NPROCS] = {
     84 	FALSE,
     85 	FALSE,
     86 	TRUE,
     87 	FALSE,
     88 	FALSE,
     89 	FALSE,
     90 	FALSE,
     91 	FALSE,
     92 	TRUE,
     93 	TRUE,
     94 	TRUE,
     95 	TRUE,
     96 	TRUE,
     97 	TRUE,
     98 	TRUE,
     99 	TRUE,
    100 	FALSE,
    101 	FALSE,
    102 };
    103 static int compressrequest[NFS_NPROCS] = {
    104 	FALSE,
    105 	TRUE,
    106 	TRUE,
    107 	FALSE,
    108 	TRUE,
    109 	TRUE,
    110 	TRUE,
    111 	FALSE,
    112 	FALSE,
    113 	TRUE,
    114 	TRUE,
    115 	TRUE,
    116 	TRUE,
    117 	TRUE,
    118 	TRUE,
    119 	TRUE,
    120 	TRUE,
    121 	TRUE,
    122 };
    123 int	nfs_sbwait();
    124 void	nfs_disconnect();
    125 struct mbuf *nfs_compress(), *nfs_uncompress();
    126 
    127 
    128 struct nfsreq nfsreqh;
    129 int nfsrexmtthresh = NFS_FISHY;
    130 int nfs_tcpnodelay = 1;
    131 
    132 /*
    133  * Initialize sockets and congestion for a new NFS connection.
    134  * We do not free the sockaddr if error.
    135  */
    136 nfs_connect(nmp)
    137 	register struct nfsmount *nmp;
    138 {
    139 	register struct socket *so;
    140 	struct sockaddr *saddr;
    141 	int s, error, bufsize;
    142 	struct mbuf *m;
    143 	struct sockaddr_in *sin;
    144 	u_short tport;
    145 
    146 	nmp->nm_so = (struct socket *)0;
    147 	saddr = mtod(nmp->nm_nam, struct sockaddr *);
    148 	if (error = socreate(saddr->sa_family,
    149 		&nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
    150 		goto bad;
    151 	so = nmp->nm_so;
    152 	nmp->nm_soflags = so->so_proto->pr_flags;
    153 
    154 	/*
    155 	 * Some servers require that the client port be a reserved port number.
    156 	 */
    157 	if (saddr->sa_family == AF_INET) {
    158 		MGET(m, M_WAIT, MT_SONAME);
    159 		sin = mtod(m, struct sockaddr_in *);
    160 		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
    161 		sin->sin_family = AF_INET;
    162 		sin->sin_addr.s_addr = INADDR_ANY;
    163 		tport = IPPORT_RESERVED - 1;
    164 		sin->sin_port = htons(tport);
    165 		while (sobind(so, m) == EADDRINUSE &&
    166 		       --tport > IPPORT_RESERVED / 2)
    167 			sin->sin_port = htons(tport);
    168 		m_freem(m);
    169 	}
    170 
    171 	if (nmp->nm_sotype == SOCK_DGRAM)
    172 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR),
    173 		    NFS_MAXPACKET);
    174 	else
    175 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)),
    176 		    NFS_MAXPACKET + sizeof(u_long));
    177 	if (error = soreserve(so, bufsize, bufsize))
    178 		goto bad;
    179 
    180 	/*
    181 	 * Protocols that do not require connections may be optionally left
    182 	 * unconnected for servers that reply from a port other than NFS_PORT.
    183 	 */
    184 	if (nmp->nm_flag & NFSMNT_NOCONN) {
    185 		if (nmp->nm_soflags & PR_CONNREQUIRED) {
    186 			error = ENOTCONN;
    187 			goto bad;
    188 		}
    189 	} else {
    190 		if (error = soconnect(so, nmp->nm_nam))
    191 			goto bad;
    192 
    193 		/*
    194 		 * Wait for the connection to complete. Cribbed from the
    195 		 * connect system call but with the wait at negative prio.
    196 		 */
    197 		s = splnet();
    198 		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
    199 			(void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0);
    200 		splx(s);
    201 		if (so->so_error) {
    202 			error = so->so_error;
    203 			goto bad;
    204 		}
    205 	}
    206 	if (nmp->nm_sotype == SOCK_DGRAM) {
    207 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    208 			so->so_rcv.sb_timeo = (5 * hz);
    209 			so->so_snd.sb_timeo = (5 * hz);
    210 		} else {
    211 			so->so_rcv.sb_timeo = 0;
    212 			so->so_snd.sb_timeo = 0;
    213 		}
    214 		nmp->nm_rto = NFS_TIMEO;
    215 	} else {
    216 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    217 			so->so_rcv.sb_timeo = (5 * hz);
    218 			so->so_snd.sb_timeo = (5 * hz);
    219 		} else {
    220 			so->so_rcv.sb_timeo = 0;
    221 			so->so_snd.sb_timeo = 0;
    222 		}
    223 		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    224 			MGET(m, M_WAIT, MT_SOOPTS);
    225 			*mtod(m, int *) = 1;
    226 			m->m_len = sizeof(int);
    227 			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
    228 		}
    229 		if (so->so_proto->pr_domain->dom_family == AF_INET &&
    230 		    so->so_proto->pr_protocol == IPPROTO_TCP &&
    231 		    nfs_tcpnodelay) {
    232 			MGET(m, M_WAIT, MT_SOOPTS);
    233 			*mtod(m, int *) = 1;
    234 			m->m_len = sizeof(int);
    235 			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
    236 		}
    237 		nmp->nm_rto = 10 * NFS_TIMEO;		/* XXX */
    238 	}
    239 	so->so_rcv.sb_flags |= SB_NOINTR;
    240 	so->so_snd.sb_flags |= SB_NOINTR;
    241 
    242 	/* Initialize other non-zero congestion variables */
    243 	nmp->nm_window = 2;			/* Initial send window */
    244 	nmp->nm_ssthresh = NFS_MAXWINDOW;	/* Slowstart threshold */
    245 	nmp->nm_rttvar = nmp->nm_rto << 1;
    246 	nmp->nm_sent = 0;
    247 	nmp->nm_currexmit = 0;
    248 	return (0);
    249 
    250 bad:
    251 	nfs_disconnect(nmp);
    252 	return (error);
    253 }
    254 
    255 /*
    256  * Reconnect routine:
    257  * Called when a connection is broken on a reliable protocol.
    258  * - clean up the old socket
    259  * - nfs_connect() again
    260  * - set R_MUSTRESEND for all outstanding requests on mount point
    261  * If this fails the mount point is DEAD!
    262  * nb: Must be called with the nfs_solock() set on the mount point.
    263  */
    264 nfs_reconnect(rep, nmp)
    265 	register struct nfsreq *rep;
    266 	register struct nfsmount *nmp;
    267 {
    268 	register struct nfsreq *rp;
    269 	int error;
    270 
    271 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    272 	    "trying reconnect");
    273 	while (error = nfs_connect(nmp)) {
    274 #ifdef lint
    275 		error = error;
    276 #endif /* lint */
    277 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp))
    278 			return (EINTR);
    279 		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
    280 	}
    281 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    282 	    "reconnected");
    283 
    284 	/*
    285 	 * Loop through outstanding request list and fix up all requests
    286 	 * on old socket.
    287 	 */
    288 	rp = nfsreqh.r_next;
    289 	while (rp != &nfsreqh) {
    290 		if (rp->r_nmp == nmp)
    291 			rp->r_flags |= R_MUSTRESEND;
    292 		rp = rp->r_next;
    293 	}
    294 	return (0);
    295 }
    296 
    297 /*
    298  * NFS disconnect. Clean up and unlink.
    299  */
    300 void
    301 nfs_disconnect(nmp)
    302 	register struct nfsmount *nmp;
    303 {
    304 	register struct socket *so;
    305 
    306 	if (nmp->nm_so) {
    307 		so = nmp->nm_so;
    308 		nmp->nm_so = (struct socket *)0;
    309 		soshutdown(so, 2);
    310 		soclose(so);
    311 	}
    312 }
    313 
    314 /*
    315  * This is the nfs send routine. For connection based socket types, it
    316  * must be called with an nfs_solock() on the socket.
    317  * "rep == NULL" indicates that it has been called from a server.
    318  */
    319 nfs_send(so, nam, top, rep)
    320 	register struct socket *so;
    321 	struct mbuf *nam;
    322 	register struct mbuf *top;
    323 	struct nfsreq *rep;
    324 {
    325 	struct mbuf *sendnam;
    326 	int error, soflags;
    327 
    328 	if (rep) {
    329 		if (rep->r_flags & R_SOFTTERM) {
    330 			m_freem(top);
    331 			return (EINTR);
    332 		}
    333 		if (rep->r_nmp->nm_so == NULL &&
    334 		    (error = nfs_reconnect(rep, rep->r_nmp)))
    335 			return (error);
    336 		rep->r_flags &= ~R_MUSTRESEND;
    337 		so = rep->r_nmp->nm_so;
    338 		soflags = rep->r_nmp->nm_soflags;
    339 	} else
    340 		soflags = so->so_proto->pr_flags;
    341 	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
    342 		sendnam = (struct mbuf *)0;
    343 	else
    344 		sendnam = nam;
    345 
    346 	error = sosend(so, sendnam, (struct uio *)0, top,
    347 		(struct mbuf *)0, 0);
    348 	if (error == EWOULDBLOCK && rep) {
    349 		if (rep->r_flags & R_SOFTTERM)
    350 			error = EINTR;
    351 		else {
    352 			rep->r_flags |= R_MUSTRESEND;
    353 			error = 0;
    354 		}
    355 	}
    356 	/*
    357 	 * Ignore socket errors??
    358 	 */
    359 	if (error && error != EINTR && error != ERESTART)
    360 		error = 0;
    361 	return (error);
    362 }
    363 
    364 /*
    365  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
    366  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
    367  * Mark and consolidate the data into a new mbuf list.
    368  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
    369  *     small mbufs.
    370  * For SOCK_STREAM we must be very careful to read an entire record once
    371  * we have read any of it, even if the system call has been interrupted.
    372  */
    373 nfs_receive(so, aname, mp, rep)
    374 	register struct socket *so;
    375 	struct mbuf **aname;
    376 	struct mbuf **mp;
    377 	register struct nfsreq *rep;
    378 {
    379 	struct uio auio;
    380 	struct iovec aio;
    381 	register struct mbuf *m;
    382 	struct mbuf *m2, *mnew, **mbp;
    383 	caddr_t fcp, tcp;
    384 	u_long len;
    385 	struct mbuf **getnam;
    386 	int error, siz, mlen, soflags, rcvflg;
    387 
    388 	/*
    389 	 * Set up arguments for soreceive()
    390 	 */
    391 	*mp = (struct mbuf *)0;
    392 	*aname = (struct mbuf *)0;
    393 	if (rep)
    394 		soflags = rep->r_nmp->nm_soflags;
    395 	else
    396 		soflags = so->so_proto->pr_flags;
    397 
    398 	/*
    399 	 * For reliable protocols, lock against other senders/receivers
    400 	 * in case a reconnect is necessary.
    401 	 * For SOCK_STREAM, first get the Record Mark to find out how much
    402 	 * more there is to get.
    403 	 * We must lock the socket against other receivers
    404 	 * until we have an entire rpc request/reply.
    405 	 */
    406 	if (soflags & PR_CONNREQUIRED) {
    407 tryagain:
    408 		/*
    409 		 * Check for fatal errors and resending request.
    410 		 */
    411 		if (rep) {
    412 			/*
    413 			 * Ugh: If a reconnect attempt just happened, nm_so
    414 			 * would have changed. NULL indicates a failed
    415 			 * attempt that has essentially shut down this
    416 			 * mount point.
    417 			 */
    418 			if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL ||
    419 				(rep->r_flags & R_SOFTTERM))
    420 				return (EINTR);
    421 			while (rep->r_flags & R_MUSTRESEND) {
    422 				m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
    423 				nfsstats.rpcretries++;
    424 				if (error = nfs_send(so, rep->r_nmp->nm_nam, m,
    425 					rep))
    426 					goto errout;
    427 			}
    428 		}
    429 		if ((soflags & PR_ATOMIC) == 0) {
    430 			aio.iov_base = (caddr_t) &len;
    431 			aio.iov_len = sizeof(u_long);
    432 			auio.uio_iov = &aio;
    433 			auio.uio_iovcnt = 1;
    434 			auio.uio_segflg = UIO_SYSSPACE;
    435 			auio.uio_rw = UIO_READ;
    436 			auio.uio_procp = (struct proc *)0;
    437 			auio.uio_offset = 0;
    438 			auio.uio_resid = sizeof(u_long);
    439 			do {
    440 			    rcvflg = MSG_WAITALL;
    441 			    error = soreceive(so, (struct mbuf **)0, &auio,
    442 				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
    443 			    if (error == EWOULDBLOCK && rep) {
    444 				if (rep->r_flags & R_SOFTTERM)
    445 					return (EINTR);
    446 				if (rep->r_flags & R_MUSTRESEND)
    447 					goto tryagain;
    448 			    }
    449 			} while (error == EWOULDBLOCK);
    450 			if (!error && auio.uio_resid > 0) {
    451 			    if (rep)
    452 				log(LOG_INFO,
    453 				   "short receive (%d/%d) from nfs server %s\n",
    454 				   sizeof(u_long) - auio.uio_resid,
    455 				   sizeof(u_long),
    456 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    457 			    error = EPIPE;
    458 			}
    459 			if (error)
    460 				goto errout;
    461 			len = ntohl(len) & ~0x80000000;
    462 			/*
    463 			 * This is SERIOUS! We are out of sync with the sender
    464 			 * and forcing a disconnect/reconnect is all I can do.
    465 			 */
    466 			if (len > NFS_MAXPACKET) {
    467 			    if (rep)
    468 				log(LOG_ERR, "%s (%d) from nfs server %s\n",
    469 				    "impossible packet length",
    470 				    len,
    471 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    472 			    error = EFBIG;
    473 			    goto errout;
    474 			}
    475 			auio.uio_resid = len;
    476 			do {
    477 			    rcvflg = MSG_WAITALL;
    478 			    error =  soreceive(so, (struct mbuf **)0,
    479 				&auio, mp, (struct mbuf **)0, &rcvflg);
    480 			} while (error == EWOULDBLOCK || error == EINTR ||
    481 				 error == ERESTART);
    482 			if (!error && auio.uio_resid > 0) {
    483 			    if (rep)
    484 				log(LOG_INFO,
    485 				   "short receive (%d/%d) from nfs server %s\n",
    486 				   len - auio.uio_resid, len,
    487 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    488 			    error = EPIPE;
    489 			}
    490 		} else {
    491 			auio.uio_resid = len = 1000000;	/* Anything Big */
    492 			do {
    493 			    rcvflg = 0;
    494 			    error =  soreceive(so, (struct mbuf **)0,
    495 				&auio, mp, (struct mbuf **)0, &rcvflg);
    496 			    if (error == EWOULDBLOCK && rep) {
    497 				if (rep->r_flags & R_SOFTTERM)
    498 					return (EINTR);
    499 				if (rep->r_flags & R_MUSTRESEND)
    500 					goto tryagain;
    501 			    }
    502 			} while (error == EWOULDBLOCK);
    503 			if (!error && *mp == NULL)
    504 				error = EPIPE;
    505 			len -= auio.uio_resid;
    506 		}
    507 errout:
    508 		if (error && rep && error != EINTR && error != ERESTART) {
    509 			m_freem(*mp);
    510 			*mp = (struct mbuf *)0;
    511 			if (error != EPIPE && rep)
    512 				log(LOG_INFO,
    513 				    "receive error %d from nfs server %s\n",
    514 				    error,
    515 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    516 			nfs_disconnect(rep->r_nmp);
    517 			error = nfs_reconnect(rep, rep->r_nmp);
    518 			if (!error)
    519 				goto tryagain;
    520 		}
    521 	} else {
    522 		if (so->so_state & SS_ISCONNECTED)
    523 			getnam = (struct mbuf **)0;
    524 		else
    525 			getnam = aname;
    526 		auio.uio_resid = len = 1000000;
    527 		do {
    528 			rcvflg = 0;
    529 			error =  soreceive(so, getnam, &auio, mp,
    530 				(struct mbuf **)0, &rcvflg);
    531 			if (error == EWOULDBLOCK && rep &&
    532 			    (rep->r_flags & R_SOFTTERM))
    533 				return (EINTR);
    534 		} while (error == EWOULDBLOCK);
    535 		len -= auio.uio_resid;
    536 	}
    537 	if (error) {
    538 		m_freem(*mp);
    539 		*mp = (struct mbuf *)0;
    540 	}
    541 	/*
    542 	 * Search for any mbufs that are not a multiple of 4 bytes long.
    543 	 * These could cause pointer alignment problems, so copy them to
    544 	 * well aligned mbufs.
    545 	 */
    546 	m = *mp;
    547 	mbp = mp;
    548 	while (m) {
    549 		/*
    550 		 * All this for something that may never happen.
    551 		 */
    552 		if (m->m_next && (m->m_len & 0x3)) {
    553 			printf("nfs_rcv odd length!\n");
    554 			mlen = 0;
    555 			while (m) {
    556 				fcp = mtod(m, caddr_t);
    557 				while (m->m_len > 0) {
    558 					if (mlen == 0) {
    559 						MGET(m2, M_WAIT, MT_DATA);
    560 						if (len >= MINCLSIZE)
    561 							MCLGET(m2, M_WAIT);
    562 						m2->m_len = 0;
    563 						mlen = M_TRAILINGSPACE(m2);
    564 						tcp = mtod(m2, caddr_t);
    565 						*mbp = m2;
    566 						mbp = &m2->m_next;
    567 					}
    568 					siz = MIN(mlen, m->m_len);
    569 					bcopy(fcp, tcp, siz);
    570 					m2->m_len += siz;
    571 					mlen -= siz;
    572 					len -= siz;
    573 					tcp += siz;
    574 					m->m_len -= siz;
    575 					fcp += siz;
    576 				}
    577 				MFREE(m, mnew);
    578 				m = mnew;
    579 			}
    580 			break;
    581 		}
    582 		len -= m->m_len;
    583 		mbp = &m->m_next;
    584 		m = m->m_next;
    585 	}
    586 	return (error);
    587 }
    588 
    589 /*
    590  * Implement receipt of reply on a socket.
    591  * We must search through the list of received datagrams matching them
    592  * with outstanding requests using the xid, until ours is found.
    593  */
    594 /* ARGSUSED */
    595 nfs_reply(nmp, myrep)
    596 	struct nfsmount *nmp;
    597 	struct nfsreq *myrep;
    598 {
    599 	register struct mbuf *m;
    600 	register struct nfsreq *rep;
    601 	register int error = 0;
    602 	u_long rxid;
    603 	struct mbuf *mp, *nam;
    604 	char *cp;
    605 	int cnt, xfer;
    606 
    607 	/*
    608 	 * Loop around until we get our own reply
    609 	 */
    610 	for (;;) {
    611 		/*
    612 		 * Lock against other receivers so that I don't get stuck in
    613 		 * sbwait() after someone else has received my reply for me.
    614 		 * Also necessary for connection based protocols to avoid
    615 		 * race conditions during a reconnect.
    616 		 */
    617 		nfs_solock(&nmp->nm_flag);
    618 		/* Already received, bye bye */
    619 		if (myrep->r_mrep != NULL) {
    620 			nfs_sounlock(&nmp->nm_flag);
    621 			return (0);
    622 		}
    623 		/*
    624 		 * Get the next Rpc reply off the socket
    625 		 */
    626 		if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) {
    627 			nfs_sounlock(&nmp->nm_flag);
    628 
    629 			/*
    630 			 * Ignore routing errors on connectionless protocols??
    631 			 */
    632 			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
    633 				nmp->nm_so->so_error = 0;
    634 				continue;
    635 			}
    636 
    637 			/*
    638 			 * Otherwise cleanup and return a fatal error.
    639 			 */
    640 			if (myrep->r_flags & R_TIMING) {
    641 				myrep->r_flags &= ~R_TIMING;
    642 				nmp->nm_rtt = -1;
    643 			}
    644 			if (myrep->r_flags & R_SENT) {
    645 				myrep->r_flags &= ~R_SENT;
    646 				nmp->nm_sent--;
    647 			}
    648 			return (error);
    649 		}
    650 
    651 		/*
    652 		 * Get the xid and check that it is an rpc reply
    653 		 */
    654 		m = mp;
    655 		while (m && m->m_len == 0)
    656 			m = m->m_next;
    657 		if (m == NULL) {
    658 			nfsstats.rpcinvalid++;
    659 			m_freem(mp);
    660 			nfs_sounlock(&nmp->nm_flag);
    661 			continue;
    662 		}
    663 		bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED);
    664 		/*
    665 		 * Loop through the request list to match up the reply
    666 		 * Iff no match, just drop the datagram
    667 		 */
    668 		m = mp;
    669 		rep = nfsreqh.r_next;
    670 		while (rep != &nfsreqh) {
    671 			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
    672 				/* Found it.. */
    673 				rep->r_mrep = m;
    674 				/*
    675 				 * Update timing
    676 				 */
    677 				if (rep->r_flags & R_TIMING) {
    678 					nfs_updatetimer(rep->r_nmp);
    679 					rep->r_flags &= ~R_TIMING;
    680 					rep->r_nmp->nm_rtt = -1;
    681 				}
    682 				if (rep->r_flags & R_SENT) {
    683 					rep->r_flags &= ~R_SENT;
    684 					rep->r_nmp->nm_sent--;
    685 				}
    686 				break;
    687 			}
    688 			rep = rep->r_next;
    689 		}
    690 		nfs_sounlock(&nmp->nm_flag);
    691 		if (nam)
    692 			m_freem(nam);
    693 		/*
    694 		 * If not matched to a request, drop it.
    695 		 * If it's mine, get out.
    696 		 */
    697 		if (rep == &nfsreqh) {
    698 			nfsstats.rpcunexpected++;
    699 			m_freem(m);
    700 		} else if (rep == myrep)
    701 			return (0);
    702 	}
    703 }
    704 
    705 /*
    706  * nfs_request - goes something like this
    707  *	- fill in request struct
    708  *	- links it into list
    709  *	- calls nfs_send() for first transmit
    710  *	- calls nfs_receive() to get reply
    711  *	- break down rpc header and return with nfs reply pointed to
    712  *	  by mrep or error
    713  * nb: always frees up mreq mbuf list
    714  */
    715 nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp)
    716 	struct vnode *vp;
    717 	struct mbuf *mreq;
    718 	u_long xid;
    719 	int procnum;
    720 	struct proc *procp;
    721 	int tryhard;
    722 	struct mount *mp;
    723 	struct mbuf **mrp;
    724 	struct mbuf **mdp;
    725 	caddr_t *dposp;
    726 {
    727 	register struct mbuf *m, *mrep;
    728 	register struct nfsreq *rep;
    729 	register u_long *tl;
    730 	register int len;
    731 	struct nfsmount *nmp;
    732 	struct mbuf *md;
    733 	struct nfsreq *reph;
    734 	caddr_t dpos;
    735 	char *cp2;
    736 	int t1;
    737 	int s, compressed;
    738 	int error = 0;
    739 
    740 	nmp = VFSTONFS(mp);
    741 	m = mreq;
    742 	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
    743 	rep->r_xid = xid;
    744 	rep->r_nmp = nmp;
    745 	rep->r_vp = vp;
    746 	rep->r_procp = procp;
    747 	if ((nmp->nm_flag & NFSMNT_SOFT) ||
    748 	    ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard))
    749 		rep->r_retry = nmp->nm_retry;
    750 	else
    751 		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
    752 	rep->r_flags = rep->r_rexmit = 0;
    753 	/*
    754 	 * Three cases:
    755 	 * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
    756 	 * - idempotent requests on SOCK_DGRAM use 0
    757 	 * - Reliable transports, NFS_RELIABLETIMEO
    758 	 *   Timeouts are still done on reliable transports to ensure detection
    759 	 *   of excessive connection delay.
    760 	 */
    761 	if (nmp->nm_sotype != SOCK_DGRAM)
    762 		rep->r_timerinit = -NFS_RELIABLETIMEO;
    763 	else if (nonidempotent[procnum])
    764 		rep->r_timerinit = -NFS_MINIDEMTIMEO;
    765 	else
    766 		rep->r_timerinit = 0;
    767 	rep->r_timer = rep->r_timerinit;
    768 	rep->r_mrep = NULL;
    769 	len = 0;
    770 	while (m) {
    771 		len += m->m_len;
    772 		m = m->m_next;
    773 	}
    774 	mreq->m_pkthdr.len = len;
    775 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
    776 	compressed = 0;
    777 	m = mreq;
    778 	if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) {
    779 		mreq = nfs_compress(mreq);
    780 		if (mreq != m) {
    781 			len = mreq->m_pkthdr.len;
    782 			compressed++;
    783 		}
    784 	}
    785 	/*
    786 	 * For non-atomic protocols, insert a Sun RPC Record Mark.
    787 	 */
    788 	if ((nmp->nm_soflags & PR_ATOMIC) == 0) {
    789 		M_PREPEND(mreq, sizeof(u_long), M_WAIT);
    790 		*mtod(mreq, u_long *) = htonl(0x80000000 | len);
    791 	}
    792 	rep->r_mreq = mreq;
    793 
    794 	/*
    795 	 * Do the client side RPC.
    796 	 */
    797 	nfsstats.rpcrequests++;
    798 	/*
    799 	 * Chain request into list of outstanding requests. Be sure
    800 	 * to put it LAST so timer finds oldest requests first.
    801 	 */
    802 	s = splnet();
    803 	reph = &nfsreqh;
    804 	reph->r_prev->r_next = rep;
    805 	rep->r_prev = reph->r_prev;
    806 	reph->r_prev = rep;
    807 	rep->r_next = reph;
    808 	/*
    809 	 * If backing off another request or avoiding congestion, don't
    810 	 * send this one now but let timer do it. If not timing a request,
    811 	 * do it now.
    812 	 */
    813 	if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM ||
    814 	    (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) {
    815 		nmp->nm_sent++;
    816 		rep->r_flags |= R_SENT;
    817 		if (nmp->nm_rtt == -1) {
    818 			nmp->nm_rtt = 0;
    819 			rep->r_flags |= R_TIMING;
    820 		}
    821 		splx(s);
    822 		m = m_copym(mreq, 0, M_COPYALL, M_WAIT);
    823 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    824 			nfs_solock(&nmp->nm_flag);
    825 		error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
    826 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    827 			nfs_sounlock(&nmp->nm_flag);
    828 		if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error))
    829 			nmp->nm_so->so_error = error = 0;
    830 	} else
    831 		splx(s);
    832 
    833 	/*
    834 	 * Wait for the reply from our send or the timer's.
    835 	 */
    836 	if (!error)
    837 		error = nfs_reply(nmp, rep);
    838 
    839 	/*
    840 	 * RPC done, unlink the request.
    841 	 */
    842 	s = splnet();
    843 	rep->r_prev->r_next = rep->r_next;
    844 	rep->r_next->r_prev = rep->r_prev;
    845 	splx(s);
    846 
    847 	/*
    848 	 * If there was a successful reply and a tprintf msg.
    849 	 * tprintf a response.
    850 	 */
    851 	if (!error && (rep->r_flags & R_TPRINTFMSG))
    852 		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    853 		    "is alive again");
    854 	m_freem(rep->r_mreq);
    855 	mrep = rep->r_mrep;
    856 	FREE((caddr_t)rep, M_NFSREQ);
    857 	if (error)
    858 		return (error);
    859 
    860 	if (compressed)
    861 		mrep = nfs_uncompress(mrep);
    862 	md = mrep;
    863 	/*
    864 	 * break down the rpc header and check if ok
    865 	 */
    866 	dpos = mtod(md, caddr_t);
    867 	nfsm_disect(tl, u_long *, 5*NFSX_UNSIGNED);
    868 	tl += 2;
    869 	if (*tl++ == rpc_msgdenied) {
    870 		if (*tl == rpc_mismatch)
    871 			error = EOPNOTSUPP;
    872 		else
    873 			error = EACCES;
    874 		m_freem(mrep);
    875 		return (error);
    876 	}
    877 	/*
    878 	 * skip over the auth_verf, someday we may want to cache auth_short's
    879 	 * for nfs_reqhead(), but for now just dump it
    880 	 */
    881 	if (*++tl != 0) {
    882 		len = nfsm_rndup(fxdr_unsigned(long, *tl));
    883 		nfsm_adv(len);
    884 	}
    885 	nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    886 	/* 0 == ok */
    887 	if (*tl == 0) {
    888 		nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    889 		if (*tl != 0) {
    890 			error = fxdr_unsigned(int, *tl);
    891 			m_freem(mrep);
    892 			return (error);
    893 		}
    894 		*mrp = mrep;
    895 		*mdp = md;
    896 		*dposp = dpos;
    897 		return (0);
    898 	}
    899 	m_freem(mrep);
    900 	return (EPROTONOSUPPORT);
    901 nfsmout:
    902 	return (error);
    903 }
    904 
    905 /*
    906  * Get a request for the server main loop
    907  * - receive a request via. nfs_soreceive()
    908  * - verify it
    909  * - fill in the cred struct.
    910  */
    911 nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr,
    912 	msk, mtch, wascomp, repstat)
    913 	struct socket *so;
    914 	u_long prog;
    915 	u_long vers;
    916 	int maxproc;
    917 	struct mbuf **nam;
    918 	struct mbuf **mrp;
    919 	struct mbuf **mdp;
    920 	caddr_t *dposp;
    921 	u_long *retxid;
    922 	u_long *procnum;
    923 	register struct ucred *cr;
    924 	struct mbuf *msk, *mtch;
    925 	int *wascomp, *repstat;
    926 {
    927 	register int i;
    928 	register u_long *tl;
    929 	register long t1;
    930 	caddr_t dpos, cp2;
    931 	int error = 0;
    932 	struct mbuf *mrep, *md;
    933 	int len;
    934 
    935 	*repstat = 0;
    936 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    937 		error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    938 	} else {
    939 		mrep = (struct mbuf *)0;
    940 		do {
    941 			if (mrep) {
    942 				m_freem(*nam);
    943 				m_freem(mrep);
    944 			}
    945 			error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    946 		} while (!error && nfs_badnam(*nam, msk, mtch));
    947 	}
    948 	if (error)
    949 		return (error);
    950 	md = mrep;
    951 	mrep = nfs_uncompress(mrep);
    952 	if (mrep != md) {
    953 		*wascomp = 1;
    954 		md = mrep;
    955 	} else
    956 		*wascomp = 0;
    957 	dpos = mtod(mrep, caddr_t);
    958 	nfsm_disect(tl, u_long *, 10*NFSX_UNSIGNED);
    959 	*retxid = fxdr_unsigned(u_long, *tl++);
    960 	if (*tl++ != rpc_call || *tl++ != rpc_vers) {
    961 		*mrp = mrep;
    962 		*procnum = NFSPROC_NOOP;
    963 		*repstat = ERPCMISMATCH;
    964 		return (0);
    965 	}
    966 	if (*tl++ != prog) {
    967 		*mrp = mrep;
    968 		*procnum = NFSPROC_NOOP;
    969 		*repstat = EPROGUNAVAIL;
    970 		return (0);
    971 	}
    972 	if (*tl++ != vers) {
    973 		*mrp = mrep;
    974 		*procnum = NFSPROC_NOOP;
    975 		*repstat = EPROGMISMATCH;
    976 		return (0);
    977 	}
    978 	*procnum = fxdr_unsigned(u_long, *tl++);
    979 	if (*procnum == NFSPROC_NULL) {
    980 		*mrp = mrep;
    981 		return (0);
    982 	}
    983 	if (*procnum > maxproc || *tl++ != rpc_auth_unix) {
    984 		*mrp = mrep;
    985 		*procnum = NFSPROC_NOOP;
    986 		*repstat = EPROCUNAVAIL;
    987 		return (0);
    988 	}
    989 	len = fxdr_unsigned(int, *tl++);
    990 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
    991 		m_freem(mrep);
    992 		return (EBADRPC);
    993 	}
    994 	len = fxdr_unsigned(int, *++tl);
    995 	if (len < 0 || len > NFS_MAXNAMLEN) {
    996 		m_freem(mrep);
    997 		return (EBADRPC);
    998 	}
    999 	nfsm_adv(nfsm_rndup(len));
   1000 	nfsm_disect(tl, u_long *, 3*NFSX_UNSIGNED);
   1001 	cr->cr_uid = fxdr_unsigned(uid_t, *tl++);
   1002 	cr->cr_gid = fxdr_unsigned(gid_t, *tl++);
   1003 	len = fxdr_unsigned(int, *tl);
   1004 	if (len < 0 || len > RPCAUTH_UNIXGIDS) {
   1005 		m_freem(mrep);
   1006 		return (EBADRPC);
   1007 	}
   1008 	nfsm_disect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
   1009 	for (i = 1; i <= len; i++)
   1010 		if (i < NGROUPS)
   1011 			cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
   1012 		else
   1013 			tl++;
   1014 	cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
   1015 	/*
   1016 	 * Do we have any use for the verifier.
   1017 	 * According to the "Remote Procedure Call Protocol Spec." it
   1018 	 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
   1019 	 * For now, just skip over it
   1020 	 */
   1021 	len = fxdr_unsigned(int, *++tl);
   1022 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
   1023 		m_freem(mrep);
   1024 		return (EBADRPC);
   1025 	}
   1026 	if (len > 0)
   1027 		nfsm_adv(nfsm_rndup(len));
   1028 	*mrp = mrep;
   1029 	*mdp = md;
   1030 	*dposp = dpos;
   1031 	return (0);
   1032 nfsmout:
   1033 	return (error);
   1034 }
   1035 
   1036 /*
   1037  * Generate the rpc reply header
   1038  * siz arg. is used to decide if adding a cluster is worthwhile
   1039  */
   1040 nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
   1041 	int siz;
   1042 	u_long retxid;
   1043 	int err;
   1044 	struct mbuf **mrq;
   1045 	struct mbuf **mbp;
   1046 	caddr_t *bposp;
   1047 {
   1048 	register u_long *tl;
   1049 	register long t1;
   1050 	caddr_t bpos;
   1051 	struct mbuf *mreq, *mb, *mb2;
   1052 
   1053 	NFSMGETHDR(mreq);
   1054 	mb = mreq;
   1055 	if ((siz+RPC_REPLYSIZ) > MHLEN)
   1056 		MCLGET(mreq, M_WAIT);
   1057 	tl = mtod(mreq, u_long *);
   1058 	mreq->m_len = 6*NFSX_UNSIGNED;
   1059 	bpos = ((caddr_t)tl)+mreq->m_len;
   1060 	*tl++ = txdr_unsigned(retxid);
   1061 	*tl++ = rpc_reply;
   1062 	if (err == ERPCMISMATCH) {
   1063 		*tl++ = rpc_msgdenied;
   1064 		*tl++ = rpc_mismatch;
   1065 		*tl++ = txdr_unsigned(2);
   1066 		*tl = txdr_unsigned(2);
   1067 	} else {
   1068 		*tl++ = rpc_msgaccepted;
   1069 		*tl++ = 0;
   1070 		*tl++ = 0;
   1071 		switch (err) {
   1072 		case EPROGUNAVAIL:
   1073 			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
   1074 			break;
   1075 		case EPROGMISMATCH:
   1076 			*tl = txdr_unsigned(RPC_PROGMISMATCH);
   1077 			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
   1078 			*tl++ = txdr_unsigned(2);
   1079 			*tl = txdr_unsigned(2);	/* someday 3 */
   1080 			break;
   1081 		case EPROCUNAVAIL:
   1082 			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
   1083 			break;
   1084 		default:
   1085 			*tl = 0;
   1086 			if (err != VNOVAL) {
   1087 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
   1088 				*tl = txdr_unsigned(err);
   1089 			}
   1090 			break;
   1091 		};
   1092 	}
   1093 	*mrq = mreq;
   1094 	*mbp = mb;
   1095 	*bposp = bpos;
   1096 	if (err != 0 && err != VNOVAL)
   1097 		nfsstats.srvrpc_errs++;
   1098 	return (0);
   1099 }
   1100 
   1101 /*
   1102  * Nfs timer routine
   1103  * Scan the nfsreq list and retranmit any requests that have timed out
   1104  * To avoid retransmission attempts on STREAM sockets (in the future) make
   1105  * sure to set the r_retry field to 0 (implies nm_retry == 0).
   1106  */
   1107 void
   1108 nfs_timer()
   1109 {
   1110 	register struct nfsreq *rep;
   1111 	register struct mbuf *m;
   1112 	register struct socket *so;
   1113 	register struct nfsmount *nmp;
   1114 	int s, error;
   1115 
   1116 	s = splnet();
   1117 	for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
   1118 		nmp = rep->r_nmp;
   1119 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) ||
   1120 		    (so = nmp->nm_so) == NULL)
   1121 			continue;
   1122 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) {
   1123 			rep->r_flags |= R_SOFTTERM;
   1124 			continue;
   1125 		}
   1126 		if (rep->r_flags & R_TIMING)	/* update rtt in mount */
   1127 			nmp->nm_rtt++;
   1128 		/* If not timed out */
   1129 		if (++rep->r_timer < nmp->nm_rto)
   1130 			continue;
   1131 		/* Do backoff and save new timeout in mount */
   1132 		if (rep->r_flags & R_TIMING) {
   1133 			nfs_backofftimer(nmp);
   1134 			rep->r_flags &= ~R_TIMING;
   1135 			nmp->nm_rtt = -1;
   1136 		}
   1137 		if (rep->r_flags & R_SENT) {
   1138 			rep->r_flags &= ~R_SENT;
   1139 			nmp->nm_sent--;
   1140 		}
   1141 
   1142 		/*
   1143 		 * Check for too many retries on soft mount.
   1144 		 * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
   1145 		 */
   1146 		if (++rep->r_rexmit > NFS_MAXREXMIT)
   1147 			rep->r_rexmit = NFS_MAXREXMIT;
   1148 
   1149 		/*
   1150 		 * Check for server not responding
   1151 		 */
   1152 		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
   1153 		     rep->r_rexmit > NFS_FISHY) {
   1154 			nfs_msg(rep->r_procp,
   1155 			    nmp->nm_mountp->mnt_stat.f_mntfromname,
   1156 			    "not responding");
   1157 			rep->r_flags |= R_TPRINTFMSG;
   1158 		}
   1159 		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
   1160 			nfsstats.rpctimeouts++;
   1161 			rep->r_flags |= R_SOFTTERM;
   1162 			continue;
   1163 		}
   1164 		if (nmp->nm_sotype != SOCK_DGRAM)
   1165 			continue;
   1166 
   1167 		/*
   1168 		 * If there is enough space and the window allows..
   1169 		 *	Resend it
   1170 		 */
   1171 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
   1172 		       nmp->nm_sent < nmp->nm_window &&
   1173 		       (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
   1174 			nfsstats.rpcretries++;
   1175 			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
   1176 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1177 			    (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0);
   1178 			else
   1179 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1180 			    nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0);
   1181 			if (error) {
   1182 				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
   1183 					so->so_error = 0;
   1184 			} else {
   1185 				/*
   1186 				 * We need to time the request even though we
   1187 				 * are retransmitting.
   1188 				 */
   1189 				nmp->nm_rtt = 0;
   1190 				nmp->nm_sent++;
   1191 				rep->r_flags |= (R_SENT|R_TIMING);
   1192 				rep->r_timer = rep->r_timerinit;
   1193 			}
   1194 		}
   1195 	}
   1196 	splx(s);
   1197 	timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ);
   1198 }
   1199 
   1200 /*
   1201  * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
   1202  * used here. The timer state is held in the nfsmount structure and
   1203  * a single request is used to clock the response. When successful
   1204  * the rtt smoothing in nfs_updatetimer is used, when failed the backoff
   1205  * is done by nfs_backofftimer. We also log failure messages in these
   1206  * routines.
   1207  *
   1208  * Congestion variables are held in the nfshost structure which
   1209  * is referenced by nfsmounts and shared per-server. This separation
   1210  * makes it possible to do per-mount timing which allows varying disk
   1211  * access times to be dealt with, while preserving a network oriented
   1212  * congestion control scheme.
   1213  *
   1214  * The windowing implements the Jacobson/Karels slowstart algorithm
   1215  * with adjusted scaling factors. We start with one request, then send
   1216  * 4 more after each success until the ssthresh limit is reached, then
   1217  * we increment at a rate proportional to the window. On failure, we
   1218  * remember 3/4 the current window and clamp the send limit to 1. Note
   1219  * ICMP source quench is not reflected in so->so_error so we ignore that
   1220  * for now.
   1221  *
   1222  * NFS behaves much more like a transport protocol with these changes,
   1223  * shedding the teenage pedal-to-the-metal tendencies of "other"
   1224  * implementations.
   1225  *
   1226  * Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
   1227  */
   1228 
   1229 /*
   1230  * The TCP algorithm was not forgiving enough. Because the NFS server
   1231  * responds only after performing lookups/diskio/etc, we have to be
   1232  * more prepared to accept a spiky variance. The TCP algorithm is:
   1233  * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
   1234  */
   1235 #define NFS_RTO(nmp)	(((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
   1236 
   1237 nfs_updatetimer(nmp)
   1238 	register struct nfsmount *nmp;
   1239 {
   1240 
   1241 	/* If retransmitted, clear and return */
   1242 	if (nmp->nm_rexmit || nmp->nm_currexmit) {
   1243 		nmp->nm_rexmit = nmp->nm_currexmit = 0;
   1244 		return;
   1245 	}
   1246 	/* If have a measurement, do smoothing */
   1247 	if (nmp->nm_srtt) {
   1248 		register short delta;
   1249 		delta = nmp->nm_rtt - (nmp->nm_srtt >> 3);
   1250 		if ((nmp->nm_srtt += delta) <= 0)
   1251 			nmp->nm_srtt = 1;
   1252 		if (delta < 0)
   1253 			delta = -delta;
   1254 		delta -= (nmp->nm_rttvar >> 2);
   1255 		if ((nmp->nm_rttvar += delta) <= 0)
   1256 			nmp->nm_rttvar = 1;
   1257 	/* Else initialize */
   1258 	} else {
   1259 		nmp->nm_rttvar = nmp->nm_rtt << 1;
   1260 		if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2;
   1261 		nmp->nm_srtt = nmp->nm_rttvar << 2;
   1262 	}
   1263 	/* Compute new Retransmission TimeOut and clip */
   1264 	nmp->nm_rto = NFS_RTO(nmp);
   1265 	if (nmp->nm_rto < NFS_MINTIMEO)
   1266 		nmp->nm_rto = NFS_MINTIMEO;
   1267 	else if (nmp->nm_rto > NFS_MAXTIMEO)
   1268 		nmp->nm_rto = NFS_MAXTIMEO;
   1269 
   1270 	/* Update window estimate */
   1271 	if (nmp->nm_window < nmp->nm_ssthresh)	/* quickly */
   1272 		nmp->nm_window += 4;
   1273 	else {						/* slowly */
   1274 		register long incr = ++nmp->nm_winext;
   1275 		incr = (incr * incr) / nmp->nm_window;
   1276 		if (incr > 0) {
   1277 			nmp->nm_winext = 0;
   1278 			++nmp->nm_window;
   1279 		}
   1280 	}
   1281 	if (nmp->nm_window > NFS_MAXWINDOW)
   1282 		nmp->nm_window = NFS_MAXWINDOW;
   1283 }
   1284 
   1285 nfs_backofftimer(nmp)
   1286 	register struct nfsmount *nmp;
   1287 {
   1288 	register unsigned long newrto;
   1289 
   1290 	/* Clip shift count */
   1291 	if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto)
   1292 		nmp->nm_rexmit = 8 * sizeof nmp->nm_rto;
   1293 	/* Back off RTO exponentially */
   1294 	newrto = NFS_RTO(nmp);
   1295 	newrto <<= (nmp->nm_rexmit - 1);
   1296 	if (newrto == 0 || newrto > NFS_MAXTIMEO)
   1297 		newrto = NFS_MAXTIMEO;
   1298 	nmp->nm_rto = newrto;
   1299 
   1300 	/* If too many retries, message, assume a bogus RTT and re-measure */
   1301 	if (nmp->nm_currexmit < nmp->nm_rexmit) {
   1302 		nmp->nm_currexmit = nmp->nm_rexmit;
   1303 		if (nmp->nm_currexmit >= nfsrexmtthresh) {
   1304 			if (nmp->nm_currexmit == nfsrexmtthresh) {
   1305 				nmp->nm_rttvar += (nmp->nm_srtt >> 2);
   1306 				nmp->nm_srtt = 0;
   1307 			}
   1308 		}
   1309 	}
   1310 	/* Close down window but remember this point (3/4 current) for later */
   1311 	nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2;
   1312 	nmp->nm_window = 1;
   1313 	nmp->nm_winext = 0;
   1314 }
   1315 
   1316 /*
   1317  * Test for a termination signal pending on procp.
   1318  * This is used for NFSMNT_INT mounts.
   1319  */
   1320 nfs_sigintr(p)
   1321 	register struct proc *p;
   1322 {
   1323 	if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) &
   1324 	    NFSINT_SIGMASK))
   1325 		return (1);
   1326 	else
   1327 		return (0);
   1328 }
   1329 
   1330 nfs_msg(p, server, msg)
   1331 	struct proc *p;
   1332 	char *server, *msg;
   1333 {
   1334 	tpr_t tpr;
   1335 
   1336 	if (p)
   1337 		tpr = tprintf_open(p);
   1338 	else
   1339 		tpr = NULL;
   1340 	tprintf(tpr, "nfs server %s: %s\n", server, msg);
   1341 	tprintf_close(tpr);
   1342 }
   1343 
   1344 /*
   1345  * Lock a socket against others.
   1346  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
   1347  * and also to avoid race conditions between the processes with nfs requests
   1348  * in progress when a reconnect is necessary.
   1349  */
   1350 nfs_solock(flagp)
   1351 	register int *flagp;
   1352 {
   1353 
   1354 	while (*flagp & NFSMNT_SCKLOCK) {
   1355 		*flagp |= NFSMNT_WANTSCK;
   1356 		(void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0);
   1357 	}
   1358 	*flagp |= NFSMNT_SCKLOCK;
   1359 }
   1360 
   1361 /*
   1362  * Unlock the stream socket for others.
   1363  */
   1364 nfs_sounlock(flagp)
   1365 	register int *flagp;
   1366 {
   1367 
   1368 	if ((*flagp & NFSMNT_SCKLOCK) == 0)
   1369 		panic("nfs sounlock");
   1370 	*flagp &= ~NFSMNT_SCKLOCK;
   1371 	if (*flagp & NFSMNT_WANTSCK) {
   1372 		*flagp &= ~NFSMNT_WANTSCK;
   1373 		wakeup((caddr_t)flagp);
   1374 	}
   1375 }
   1376 
   1377 /*
   1378  * This function compares two net addresses by family and returns TRUE
   1379  * if they are the same.
   1380  * If there is any doubt, return FALSE.
   1381  */
   1382 nfs_netaddr_match(nam1, nam2)
   1383 	struct mbuf *nam1, *nam2;
   1384 {
   1385 	register struct sockaddr *saddr1, *saddr2;
   1386 
   1387 	saddr1 = mtod(nam1, struct sockaddr *);
   1388 	saddr2 = mtod(nam2, struct sockaddr *);
   1389 	if (saddr1->sa_family != saddr2->sa_family)
   1390 		return (0);
   1391 
   1392 	/*
   1393 	 * Must do each address family separately since unused fields
   1394 	 * are undefined values and not always zeroed.
   1395 	 */
   1396 	switch (saddr1->sa_family) {
   1397 	case AF_INET:
   1398 		if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr ==
   1399 		    ((struct sockaddr_in *)saddr2)->sin_addr.s_addr)
   1400 			return (1);
   1401 		break;
   1402 	default:
   1403 		break;
   1404 	};
   1405 	return (0);
   1406 }
   1407 
   1408 /*
   1409  * Check the hostname fields for nfsd's mask and match fields.
   1410  * By address family:
   1411  * - Bitwise AND the mask with the host address field
   1412  * - Compare for == with match
   1413  * return TRUE if not equal
   1414  */
   1415 nfs_badnam(nam, msk, mtch)
   1416 	register struct mbuf *nam, *msk, *mtch;
   1417 {
   1418 	switch (mtod(nam, struct sockaddr *)->sa_family) {
   1419 	case AF_INET:
   1420 		return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr &
   1421 			 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) !=
   1422 			 mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr);
   1423 	default:
   1424 		printf("nfs_badmatch, unknown sa_family\n");
   1425 		return (0);
   1426 	};
   1427 }
   1428