Home | History | Annotate | Line # | Download | only in nfs
nfs_socket.c revision 1.9
      1 /*
      2  * Copyright (c) 1989, 1991 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Rick Macklem at The University of Guelph.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  *
     36  *	from: @(#)nfs_socket.c	7.23 (Berkeley) 4/20/91
     37  *	$Id: nfs_socket.c,v 1.9 1993/12/18 00:45:14 mycroft Exp $
     38  */
     39 
     40 /*
     41  * Socket operations for use by nfs
     42  */
     43 
     44 #include <sys/param.h>
     45 #include <sys/systm.h>
     46 #include <sys/proc.h>
     47 #include <sys/mount.h>
     48 #include <sys/kernel.h>
     49 #include <sys/malloc.h>
     50 #include <sys/mbuf.h>
     51 #include <sys/namei.h>
     52 #include <sys/vnode.h>
     53 #include <sys/domain.h>
     54 #include <sys/protosw.h>
     55 #include <sys/socket.h>
     56 #include <sys/socketvar.h>
     57 #include <sys/syslog.h>
     58 #include <sys/tprintf.h>
     59 
     60 #include <netinet/in.h>
     61 #include <netinet/tcp.h>
     62 
     63 #include <nfs/rpcv2.h>
     64 #include <nfs/nfsv2.h>
     65 #include <nfs/nfs.h>
     66 #include <nfs/xdr_subs.h>
     67 #include <nfs/nfsm_subs.h>
     68 #include <nfs/nfsmount.h>
     69 
     70 #define	TRUE	1
     71 #define	FALSE	0
     72 
     73 /*
     74  * External data, mostly RPC constants in XDR form
     75  */
     76 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
     77 	rpc_msgaccepted, rpc_call;
     78 extern u_long nfs_prog, nfs_vers;
     79 /* Maybe these should be bits in a u_long ?? */
     80 /*
     81  * Static array that defines which nfs rpc's are nonidempotent
     82  */
     83 int nonidempotent[NFS_NPROCS] = {
     84 	FALSE,
     85 	FALSE,
     86 	TRUE,
     87 	FALSE,
     88 	FALSE,
     89 	FALSE,
     90 	FALSE,
     91 	FALSE,
     92 	TRUE,
     93 	TRUE,
     94 	TRUE,
     95 	TRUE,
     96 	TRUE,
     97 	TRUE,
     98 	TRUE,
     99 	TRUE,
    100 	FALSE,
    101 	FALSE,
    102 };
    103 static int compressrequest[NFS_NPROCS] = {
    104 	FALSE,
    105 	TRUE,
    106 	TRUE,
    107 	FALSE,
    108 	TRUE,
    109 	TRUE,
    110 	TRUE,
    111 	FALSE,
    112 	FALSE,
    113 	TRUE,
    114 	TRUE,
    115 	TRUE,
    116 	TRUE,
    117 	TRUE,
    118 	TRUE,
    119 	TRUE,
    120 	TRUE,
    121 	TRUE,
    122 };
    123 int	nfs_sbwait();
    124 void	nfs_disconnect();
    125 struct mbuf *nfs_compress(), *nfs_uncompress();
    126 
    127 
    128 struct nfsreq nfsreqh;
    129 int nfsrexmtthresh = NFS_FISHY;
    130 int nfs_tcpnodelay = 1;
    131 
    132 /*
    133  * Initialize sockets and congestion for a new NFS connection.
    134  * We do not free the sockaddr if error.
    135  */
    136 nfs_connect(nmp)
    137 	register struct nfsmount *nmp;
    138 {
    139 	register struct socket *so;
    140 	struct sockaddr *saddr;					/* 08 Sep 92*/
    141 	int s, error, bufsize;
    142 	struct mbuf *m;
    143 	struct sockaddr_in *sin;				/* 08 Sep 92*/
    144 	u_short tport;						/* 08 Sep 92*/
    145 
    146 	nmp->nm_so = (struct socket *)0;
    147 	saddr = mtod(nmp->nm_nam, struct sockaddr *);		/* 08 Sep 92*/
    148 	if (error = socreate(saddr->sa_family,			/* 08 Sep 92*/
    149 		&nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
    150 		goto bad;
    151 	so = nmp->nm_so;
    152 	nmp->nm_soflags = so->so_proto->pr_flags;
    153 
    154 	/*
    155 	 * 08 Sep 92
    156 	 *
    157 	 * Some servers require that the client port be a reserved port number.
    158 	 */
    159 	if (saddr->sa_family == AF_INET) {
    160 		MGET(m, M_WAIT, MT_SONAME);
    161 		sin = mtod(m, struct sockaddr_in *);
    162 		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
    163 		sin->sin_family = AF_INET;
    164 		sin->sin_addr.s_addr = INADDR_ANY;
    165 		tport = IPPORT_RESERVED - 1;
    166 		sin->sin_port = htons(tport);
    167 		while (sobind(so, m) == EADDRINUSE &&
    168 		       --tport > IPPORT_RESERVED / 2)
    169 			sin->sin_port = htons(tport);
    170 		m_freem(m);
    171 	}
    172 
    173 	if (nmp->nm_sotype == SOCK_DGRAM)
    174 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR),
    175 		    NFS_MAXPACKET);
    176 	else
    177 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)),
    178 		    NFS_MAXPACKET + sizeof(u_long));
    179 	if (error = soreserve(so, bufsize, bufsize))
    180 		goto bad;
    181 
    182 	/*
    183 	 * Protocols that do not require connections may be optionally left
    184 	 * unconnected for servers that reply from a port other than NFS_PORT.
    185 	 */
    186 	if (nmp->nm_flag & NFSMNT_NOCONN) {
    187 		if (nmp->nm_soflags & PR_CONNREQUIRED) {
    188 			error = ENOTCONN;
    189 			goto bad;
    190 		}
    191 	} else {
    192 		if (error = soconnect(so, nmp->nm_nam))
    193 			goto bad;
    194 
    195 		/*
    196 		 * Wait for the connection to complete. Cribbed from the
    197 		 * connect system call but with the wait at negative prio.
    198 		 */
    199 		s = splnet();
    200 		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
    201 			(void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0);
    202 		splx(s);
    203 		if (so->so_error) {
    204 			error = so->so_error;
    205 			goto bad;
    206 		}
    207 	}
    208 	if (nmp->nm_sotype == SOCK_DGRAM) {
    209 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    210 			so->so_rcv.sb_timeo = (5 * hz);
    211 			so->so_snd.sb_timeo = (5 * hz);
    212 		} else {
    213 			so->so_rcv.sb_timeo = 0;
    214 			so->so_snd.sb_timeo = 0;
    215 		}
    216 		nmp->nm_rto = NFS_TIMEO;
    217 	} else {
    218 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    219 			so->so_rcv.sb_timeo = (5 * hz);
    220 			so->so_snd.sb_timeo = (5 * hz);
    221 		} else {
    222 			so->so_rcv.sb_timeo = 0;
    223 			so->so_snd.sb_timeo = 0;
    224 		}
    225 		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    226 			MGET(m, M_WAIT, MT_SOOPTS);
    227 			*mtod(m, int *) = 1;
    228 			m->m_len = sizeof(int);
    229 			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
    230 		}
    231 		if (so->so_proto->pr_domain->dom_family == AF_INET &&
    232 		    so->so_proto->pr_protocol == IPPROTO_TCP &&
    233 		    nfs_tcpnodelay) {
    234 			MGET(m, M_WAIT, MT_SOOPTS);
    235 			*mtod(m, int *) = 1;
    236 			m->m_len = sizeof(int);
    237 			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
    238 		}
    239 		nmp->nm_rto = 10 * NFS_TIMEO;		/* XXX */
    240 	}
    241 	so->so_rcv.sb_flags |= SB_NOINTR;
    242 	so->so_snd.sb_flags |= SB_NOINTR;
    243 
    244 	/* Initialize other non-zero congestion variables */
    245 	nmp->nm_window = 2;			/* Initial send window */
    246 	nmp->nm_ssthresh = NFS_MAXWINDOW;	/* Slowstart threshold */
    247 	nmp->nm_rttvar = nmp->nm_rto << 1;
    248 	nmp->nm_sent = 0;
    249 	nmp->nm_currexmit = 0;
    250 	return (0);
    251 
    252 bad:
    253 	nfs_disconnect(nmp);
    254 	return (error);
    255 }
    256 
    257 /*
    258  * Reconnect routine:
    259  * Called when a connection is broken on a reliable protocol.
    260  * - clean up the old socket
    261  * - nfs_connect() again
    262  * - set R_MUSTRESEND for all outstanding requests on mount point
    263  * If this fails the mount point is DEAD!
    264  * nb: Must be called with the nfs_solock() set on the mount point.
    265  */
    266 nfs_reconnect(rep, nmp)
    267 	register struct nfsreq *rep;
    268 	register struct nfsmount *nmp;
    269 {
    270 	register struct nfsreq *rp;
    271 	int error;
    272 
    273 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    274 	    "trying reconnect");
    275 	while (error = nfs_connect(nmp)) {
    276 #ifdef lint
    277 		error = error;
    278 #endif /* lint */
    279 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp))
    280 			return (EINTR);
    281 		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
    282 	}
    283 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    284 	    "reconnected");
    285 
    286 	/*
    287 	 * Loop through outstanding request list and fix up all requests
    288 	 * on old socket.
    289 	 */
    290 	rp = nfsreqh.r_next;
    291 	while (rp != &nfsreqh) {
    292 		if (rp->r_nmp == nmp)
    293 			rp->r_flags |= R_MUSTRESEND;
    294 		rp = rp->r_next;
    295 	}
    296 	return (0);
    297 }
    298 
    299 /*
    300  * NFS disconnect. Clean up and unlink.
    301  */
    302 void
    303 nfs_disconnect(nmp)
    304 	register struct nfsmount *nmp;
    305 {
    306 	register struct socket *so;
    307 
    308 	if (nmp->nm_so) {
    309 		so = nmp->nm_so;
    310 		nmp->nm_so = (struct socket *)0;
    311 		soshutdown(so, 2);
    312 		soclose(so);
    313 	}
    314 }
    315 
    316 /*
    317  * This is the nfs send routine. For connection based socket types, it
    318  * must be called with an nfs_solock() on the socket.
    319  * "rep == NULL" indicates that it has been called from a server.
    320  */
    321 nfs_send(so, nam, top, rep)
    322 	register struct socket *so;
    323 	struct mbuf *nam;
    324 	register struct mbuf *top;
    325 	struct nfsreq *rep;
    326 {
    327 	struct mbuf *sendnam;
    328 	int error, soflags;
    329 
    330 	if (rep) {
    331 		if (rep->r_flags & R_SOFTTERM) {
    332 			m_freem(top);
    333 			return (EINTR);
    334 		}
    335 		if (rep->r_nmp->nm_so == NULL &&
    336 		    (error = nfs_reconnect(rep, rep->r_nmp)))
    337 			return (error);
    338 		rep->r_flags &= ~R_MUSTRESEND;
    339 		so = rep->r_nmp->nm_so;
    340 		soflags = rep->r_nmp->nm_soflags;
    341 	} else
    342 		soflags = so->so_proto->pr_flags;
    343 	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
    344 		sendnam = (struct mbuf *)0;
    345 	else
    346 		sendnam = nam;
    347 
    348 	error = sosend(so, sendnam, (struct uio *)0, top,
    349 		(struct mbuf *)0, 0);
    350 	if (error == EWOULDBLOCK && rep) {
    351 		if (rep->r_flags & R_SOFTTERM)
    352 			error = EINTR;
    353 		else {
    354 			rep->r_flags |= R_MUSTRESEND;
    355 			error = 0;
    356 		}
    357 	}
    358 	/*
    359 	 * Ignore socket errors??
    360 	 */
    361 	if (error && error != EINTR && error != ERESTART)
    362 		error = 0;
    363 	return (error);
    364 }
    365 
    366 /*
    367  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
    368  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
    369  * Mark and consolidate the data into a new mbuf list.
    370  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
    371  *     small mbufs.
    372  * For SOCK_STREAM we must be very careful to read an entire record once
    373  * we have read any of it, even if the system call has been interrupted.
    374  */
    375 nfs_receive(so, aname, mp, rep)
    376 	register struct socket *so;
    377 	struct mbuf **aname;
    378 	struct mbuf **mp;
    379 	register struct nfsreq *rep;
    380 {
    381 	struct uio auio;
    382 	struct iovec aio;
    383 	register struct mbuf *m;
    384 	struct mbuf *m2, *mnew, **mbp;
    385 	caddr_t fcp, tcp;
    386 	u_long len;
    387 	struct mbuf **getnam;
    388 	int error, siz, mlen, soflags, rcvflg;
    389 
    390 	/*
    391 	 * Set up arguments for soreceive()
    392 	 */
    393 	*mp = (struct mbuf *)0;
    394 	*aname = (struct mbuf *)0;
    395 	if (rep)
    396 		soflags = rep->r_nmp->nm_soflags;
    397 	else
    398 		soflags = so->so_proto->pr_flags;
    399 
    400 	/*
    401 	 * For reliable protocols, lock against other senders/receivers
    402 	 * in case a reconnect is necessary.
    403 	 * For SOCK_STREAM, first get the Record Mark to find out how much
    404 	 * more there is to get.
    405 	 * We must lock the socket against other receivers
    406 	 * until we have an entire rpc request/reply.
    407 	 */
    408 	if (soflags & PR_CONNREQUIRED) {
    409 tryagain:
    410 		/*
    411 		 * Check for fatal errors and resending request.
    412 		 */
    413 		if (rep) {
    414 			/*
    415 			 * Ugh: If a reconnect attempt just happened, nm_so
    416 			 * would have changed. NULL indicates a failed
    417 			 * attempt that has essentially shut down this
    418 			 * mount point.
    419 			 */
    420 			if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL ||
    421 				(rep->r_flags & R_SOFTTERM))
    422 				return (EINTR);
    423 			while (rep->r_flags & R_MUSTRESEND) {
    424 				m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
    425 				nfsstats.rpcretries++;
    426 				if (error = nfs_send(so, rep->r_nmp->nm_nam, m,
    427 					rep))
    428 					goto errout;
    429 			}
    430 		}
    431 		if ((soflags & PR_ATOMIC) == 0) {
    432 			aio.iov_base = (caddr_t) &len;
    433 			aio.iov_len = sizeof(u_long);
    434 			auio.uio_iov = &aio;
    435 			auio.uio_iovcnt = 1;
    436 			auio.uio_segflg = UIO_SYSSPACE;
    437 			auio.uio_rw = UIO_READ;
    438 			auio.uio_procp = (struct proc *)0;
    439 			auio.uio_offset = 0;
    440 			auio.uio_resid = sizeof(u_long);
    441 			do {
    442 			    rcvflg = MSG_WAITALL;
    443 			    error = soreceive(so, (struct mbuf **)0, &auio,
    444 				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
    445 			    if (error == EWOULDBLOCK && rep) {
    446 				if (rep->r_flags & R_SOFTTERM)
    447 					return (EINTR);
    448 				if (rep->r_flags & R_MUSTRESEND)
    449 					goto tryagain;
    450 			    }
    451 			} while (error == EWOULDBLOCK);
    452 			if (!error && auio.uio_resid > 0) {
    453 			    if (rep)
    454 				log(LOG_INFO,
    455 				   "short receive (%d/%d) from nfs server %s\n",
    456 				   sizeof(u_long) - auio.uio_resid,
    457 				   sizeof(u_long),
    458 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    459 			    error = EPIPE;
    460 			}
    461 			if (error)
    462 				goto errout;
    463 			len = ntohl(len) & ~0x80000000;
    464 			/*
    465 			 * This is SERIOUS! We are out of sync with the sender
    466 			 * and forcing a disconnect/reconnect is all I can do.
    467 			 */
    468 			if (len > NFS_MAXPACKET) {
    469 			    if (rep)
    470 				log(LOG_ERR, "%s (%d) from nfs server %s\n",
    471 				    "impossible packet length",
    472 				    len,
    473 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    474 			    error = EFBIG;
    475 			    goto errout;
    476 			}
    477 			auio.uio_resid = len;
    478 			do {
    479 			    rcvflg = MSG_WAITALL;
    480 			    error =  soreceive(so, (struct mbuf **)0,
    481 				&auio, mp, (struct mbuf **)0, &rcvflg);
    482 			} while (error == EWOULDBLOCK || error == EINTR ||
    483 				 error == ERESTART);
    484 			if (!error && auio.uio_resid > 0) {
    485 			    if (rep)
    486 				log(LOG_INFO,
    487 				   "short receive (%d/%d) from nfs server %s\n",
    488 				   len - auio.uio_resid, len,
    489 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    490 			    error = EPIPE;
    491 			}
    492 		} else {
    493 			auio.uio_resid = len = 1000000;	/* Anything Big */
    494 			do {
    495 			    rcvflg = 0;
    496 			    error =  soreceive(so, (struct mbuf **)0,
    497 				&auio, mp, (struct mbuf **)0, &rcvflg);
    498 			    if (error == EWOULDBLOCK && rep) {
    499 				if (rep->r_flags & R_SOFTTERM)
    500 					return (EINTR);
    501 				if (rep->r_flags & R_MUSTRESEND)
    502 					goto tryagain;
    503 			    }
    504 			} while (error == EWOULDBLOCK);
    505 			if (!error && *mp == NULL)
    506 				error = EPIPE;
    507 			len -= auio.uio_resid;
    508 		}
    509 errout:
    510 		if (error && rep && error != EINTR && error != ERESTART) {
    511 			m_freem(*mp);
    512 			*mp = (struct mbuf *)0;
    513 			if (error != EPIPE && rep)
    514 				log(LOG_INFO,
    515 				    "receive error %d from nfs server %s\n",
    516 				    error,
    517 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    518 			nfs_disconnect(rep->r_nmp);
    519 			error = nfs_reconnect(rep, rep->r_nmp);
    520 			if (!error)
    521 				goto tryagain;
    522 		}
    523 	} else {
    524 		if (so->so_state & SS_ISCONNECTED)
    525 			getnam = (struct mbuf **)0;
    526 		else
    527 			getnam = aname;
    528 		auio.uio_resid = len = 1000000;
    529 		do {
    530 			rcvflg = 0;
    531 			error =  soreceive(so, getnam, &auio, mp,
    532 				(struct mbuf **)0, &rcvflg);
    533 			if (error == EWOULDBLOCK && rep &&
    534 			    (rep->r_flags & R_SOFTTERM))
    535 				return (EINTR);
    536 		} while (error == EWOULDBLOCK);
    537 		len -= auio.uio_resid;
    538 	}
    539 	if (error) {
    540 		m_freem(*mp);
    541 		*mp = (struct mbuf *)0;
    542 	}
    543 	/*
    544 	 * Search for any mbufs that are not a multiple of 4 bytes long.
    545 	 * These could cause pointer alignment problems, so copy them to
    546 	 * well aligned mbufs.
    547 	 */
    548 	m = *mp;
    549 	mbp = mp;
    550 	while (m) {
    551 		/*
    552 		 * All this for something that may never happen.
    553 		 */
    554 		if (m->m_next && (m->m_len & 0x3)) {
    555 			printf("nfs_rcv odd length!\n");
    556 			mlen = 0;
    557 			while (m) {
    558 				fcp = mtod(m, caddr_t);
    559 				while (m->m_len > 0) {
    560 					if (mlen == 0) {
    561 						MGET(m2, M_WAIT, MT_DATA);
    562 						if (len >= MINCLSIZE)
    563 							MCLGET(m2, M_WAIT);
    564 						m2->m_len = 0;
    565 						mlen = M_TRAILINGSPACE(m2);
    566 						tcp = mtod(m2, caddr_t);
    567 						*mbp = m2;
    568 						mbp = &m2->m_next;
    569 					}
    570 					siz = MIN(mlen, m->m_len);
    571 					bcopy(fcp, tcp, siz);
    572 					m2->m_len += siz;
    573 					mlen -= siz;
    574 					len -= siz;
    575 					tcp += siz;
    576 					m->m_len -= siz;
    577 					fcp += siz;
    578 				}
    579 				MFREE(m, mnew);
    580 				m = mnew;
    581 			}
    582 			break;
    583 		}
    584 		len -= m->m_len;
    585 		mbp = &m->m_next;
    586 		m = m->m_next;
    587 	}
    588 	return (error);
    589 }
    590 
    591 /*
    592  * Implement receipt of reply on a socket.
    593  * We must search through the list of received datagrams matching them
    594  * with outstanding requests using the xid, until ours is found.
    595  */
    596 /* ARGSUSED */
    597 nfs_reply(nmp, myrep)
    598 	struct nfsmount *nmp;
    599 	struct nfsreq *myrep;
    600 {
    601 	register struct mbuf *m;
    602 	register struct nfsreq *rep;
    603 	register int error = 0;
    604 	u_long rxid;
    605 	struct mbuf *mp, *nam;
    606 	char *cp;
    607 	int cnt, xfer;
    608 
    609 	/*
    610 	 * Loop around until we get our own reply
    611 	 */
    612 	for (;;) {
    613 		/*
    614 		 * Lock against other receivers so that I don't get stuck in
    615 		 * sbwait() after someone else has received my reply for me.
    616 		 * Also necessary for connection based protocols to avoid
    617 		 * race conditions during a reconnect.
    618 		 */
    619 		nfs_solock(&nmp->nm_flag);
    620 		/* Already received, bye bye */
    621 		if (myrep->r_mrep != NULL) {
    622 			nfs_sounlock(&nmp->nm_flag);
    623 			return (0);
    624 		}
    625 		/*
    626 		 * Get the next Rpc reply off the socket
    627 		 */
    628 		if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) {
    629 			nfs_sounlock(&nmp->nm_flag);
    630 
    631 			/*
    632 			 * Ignore routing errors on connectionless protocols??
    633 			 */
    634 			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
    635 				nmp->nm_so->so_error = 0;
    636 				continue;
    637 			}
    638 
    639 			/*
    640 			 * Otherwise cleanup and return a fatal error.
    641 			 */
    642 			if (myrep->r_flags & R_TIMING) {
    643 				myrep->r_flags &= ~R_TIMING;
    644 				nmp->nm_rtt = -1;
    645 			}
    646 			if (myrep->r_flags & R_SENT) {
    647 				myrep->r_flags &= ~R_SENT;
    648 				nmp->nm_sent--;
    649 			}
    650 			return (error);
    651 		}
    652 
    653 		/*
    654 		 * Get the xid and check that it is an rpc reply
    655 		 */
    656 		m = mp;
    657 		while (m && m->m_len == 0)
    658 			m = m->m_next;
    659 		if (m == NULL) {
    660 			nfsstats.rpcinvalid++;
    661 			m_freem(mp);
    662 			nfs_sounlock(&nmp->nm_flag);
    663 			continue;
    664 		}
    665 		bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED);
    666 		/*
    667 		 * Loop through the request list to match up the reply
    668 		 * Iff no match, just drop the datagram
    669 		 */
    670 		m = mp;
    671 		rep = nfsreqh.r_next;
    672 		while (rep != &nfsreqh) {
    673 			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
    674 				/* Found it.. */
    675 				rep->r_mrep = m;
    676 				/*
    677 				 * Update timing
    678 				 */
    679 				if (rep->r_flags & R_TIMING) {
    680 					nfs_updatetimer(rep->r_nmp);
    681 					rep->r_flags &= ~R_TIMING;
    682 					rep->r_nmp->nm_rtt = -1;
    683 				}
    684 				if (rep->r_flags & R_SENT) {
    685 					rep->r_flags &= ~R_SENT;
    686 					rep->r_nmp->nm_sent--;
    687 				}
    688 				break;
    689 			}
    690 			rep = rep->r_next;
    691 		}
    692 		nfs_sounlock(&nmp->nm_flag);
    693 		if (nam)
    694 			m_freem(nam);
    695 		/*
    696 		 * If not matched to a request, drop it.
    697 		 * If it's mine, get out.
    698 		 */
    699 		if (rep == &nfsreqh) {
    700 			nfsstats.rpcunexpected++;
    701 			m_freem(m);
    702 		} else if (rep == myrep)
    703 			return (0);
    704 	}
    705 }
    706 
    707 /*
    708  * nfs_request - goes something like this
    709  *	- fill in request struct
    710  *	- links it into list
    711  *	- calls nfs_send() for first transmit
    712  *	- calls nfs_receive() to get reply
    713  *	- break down rpc header and return with nfs reply pointed to
    714  *	  by mrep or error
    715  * nb: always frees up mreq mbuf list
    716  */
    717 nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp)
    718 	struct vnode *vp;
    719 	struct mbuf *mreq;
    720 	u_long xid;
    721 	int procnum;
    722 	struct proc *procp;
    723 	int tryhard;
    724 	struct mount *mp;
    725 	struct mbuf **mrp;
    726 	struct mbuf **mdp;
    727 	caddr_t *dposp;
    728 {
    729 	register struct mbuf *m, *mrep;
    730 	register struct nfsreq *rep;
    731 	register u_long *tl;
    732 	register int len;
    733 	struct nfsmount *nmp;
    734 	struct mbuf *md;
    735 	struct nfsreq *reph;
    736 	caddr_t dpos;
    737 	char *cp2;
    738 	int t1;
    739 	int s, compressed;
    740 	int error = 0;
    741 
    742 	nmp = VFSTONFS(mp);
    743 	m = mreq;
    744 	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
    745 	rep->r_xid = xid;
    746 	rep->r_nmp = nmp;
    747 	rep->r_vp = vp;
    748 	rep->r_procp = procp;
    749 	if ((nmp->nm_flag & NFSMNT_SOFT) ||
    750 	    ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard))
    751 		rep->r_retry = nmp->nm_retry;
    752 	else
    753 		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
    754 	rep->r_flags = rep->r_rexmit = 0;
    755 	/*
    756 	 * Three cases:
    757 	 * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
    758 	 * - idempotent requests on SOCK_DGRAM use 0
    759 	 * - Reliable transports, NFS_RELIABLETIMEO
    760 	 *   Timeouts are still done on reliable transports to ensure detection
    761 	 *   of excessive connection delay.
    762 	 */
    763 	if (nmp->nm_sotype != SOCK_DGRAM)
    764 		rep->r_timerinit = -NFS_RELIABLETIMEO;
    765 	else if (nonidempotent[procnum])
    766 		rep->r_timerinit = -NFS_MINIDEMTIMEO;
    767 	else
    768 		rep->r_timerinit = 0;
    769 	rep->r_timer = rep->r_timerinit;
    770 	rep->r_mrep = NULL;
    771 	len = 0;
    772 	while (m) {
    773 		len += m->m_len;
    774 		m = m->m_next;
    775 	}
    776 	mreq->m_pkthdr.len = len;
    777 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
    778 	compressed = 0;
    779 	m = mreq;
    780 	if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) {
    781 		mreq = nfs_compress(mreq);
    782 		if (mreq != m) {
    783 			len = mreq->m_pkthdr.len;
    784 			compressed++;
    785 		}
    786 	}
    787 	/*
    788 	 * For non-atomic protocols, insert a Sun RPC Record Mark.
    789 	 */
    790 	if ((nmp->nm_soflags & PR_ATOMIC) == 0) {
    791 		M_PREPEND(mreq, sizeof(u_long), M_WAIT);
    792 		*mtod(mreq, u_long *) = htonl(0x80000000 | len);
    793 	}
    794 	rep->r_mreq = mreq;
    795 
    796 	/*
    797 	 * Do the client side RPC.
    798 	 */
    799 	nfsstats.rpcrequests++;
    800 	/*
    801 	 * Chain request into list of outstanding requests. Be sure
    802 	 * to put it LAST so timer finds oldest requests first.
    803 	 */
    804 	s = splnet();
    805 	reph = &nfsreqh;
    806 	reph->r_prev->r_next = rep;
    807 	rep->r_prev = reph->r_prev;
    808 	reph->r_prev = rep;
    809 	rep->r_next = reph;
    810 	/*
    811 	 * If backing off another request or avoiding congestion, don't
    812 	 * send this one now but let timer do it. If not timing a request,
    813 	 * do it now.
    814 	 */
    815 	if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM ||
    816 	    (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) {
    817 		nmp->nm_sent++;
    818 		rep->r_flags |= R_SENT;
    819 		if (nmp->nm_rtt == -1) {
    820 			nmp->nm_rtt = 0;
    821 			rep->r_flags |= R_TIMING;
    822 		}
    823 		splx(s);
    824 		m = m_copym(mreq, 0, M_COPYALL, M_WAIT);
    825 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    826 			nfs_solock(&nmp->nm_flag);
    827 		error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
    828 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    829 			nfs_sounlock(&nmp->nm_flag);
    830 		if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error))
    831 			nmp->nm_so->so_error = error = 0;
    832 	} else
    833 		splx(s);
    834 
    835 	/*
    836 	 * Wait for the reply from our send or the timer's.
    837 	 */
    838 	if (!error)
    839 		error = nfs_reply(nmp, rep);
    840 
    841 	/*
    842 	 * RPC done, unlink the request.
    843 	 */
    844 	s = splnet();
    845 	rep->r_prev->r_next = rep->r_next;
    846 	rep->r_next->r_prev = rep->r_prev;
    847 	splx(s);
    848 
    849 	/*
    850 	 * If there was a successful reply and a tprintf msg.
    851 	 * tprintf a response.
    852 	 */
    853 	if (!error && (rep->r_flags & R_TPRINTFMSG))
    854 		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    855 		    "is alive again");
    856 	m_freem(rep->r_mreq);
    857 	mrep = rep->r_mrep;
    858 	FREE((caddr_t)rep, M_NFSREQ);
    859 	if (error)
    860 		return (error);
    861 
    862 	if (compressed)
    863 		mrep = nfs_uncompress(mrep);
    864 	md = mrep;
    865 	/*
    866 	 * break down the rpc header and check if ok
    867 	 */
    868 	dpos = mtod(md, caddr_t);
    869 	nfsm_disect(tl, u_long *, 5*NFSX_UNSIGNED);
    870 	tl += 2;
    871 	if (*tl++ == rpc_msgdenied) {
    872 		if (*tl == rpc_mismatch)
    873 			error = EOPNOTSUPP;
    874 		else
    875 			error = EACCES;
    876 		m_freem(mrep);
    877 		return (error);
    878 	}
    879 	/*
    880 	 * skip over the auth_verf, someday we may want to cache auth_short's
    881 	 * for nfs_reqhead(), but for now just dump it
    882 	 */
    883 	if (*++tl != 0) {
    884 		len = nfsm_rndup(fxdr_unsigned(long, *tl));
    885 		nfsm_adv(len);
    886 	}
    887 	nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    888 	/* 0 == ok */
    889 	if (*tl == 0) {
    890 		nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    891 		if (*tl != 0) {
    892 			error = fxdr_unsigned(int, *tl);
    893 			m_freem(mrep);
    894 			return (error);
    895 		}
    896 		*mrp = mrep;
    897 		*mdp = md;
    898 		*dposp = dpos;
    899 		return (0);
    900 	}
    901 	m_freem(mrep);
    902 	return (EPROTONOSUPPORT);
    903 nfsmout:
    904 	return (error);
    905 }
    906 
    907 /*
    908  * Get a request for the server main loop
    909  * - receive a request via. nfs_soreceive()
    910  * - verify it
    911  * - fill in the cred struct.
    912  */
    913 nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr,
    914 	msk, mtch, wascomp, repstat)				/* 08 Aug 92*/
    915 	struct socket *so;
    916 	u_long prog;
    917 	u_long vers;
    918 	int maxproc;
    919 	struct mbuf **nam;
    920 	struct mbuf **mrp;
    921 	struct mbuf **mdp;
    922 	caddr_t *dposp;
    923 	u_long *retxid;
    924 	u_long *procnum;
    925 	register struct ucred *cr;
    926 	struct mbuf *msk, *mtch;
    927 	int *wascomp, *repstat;					/* 08 Aug 92*/
    928 {
    929 	register int i;
    930 	register u_long *tl;
    931 	register long t1;
    932 	caddr_t dpos, cp2;
    933 	int error = 0;
    934 	struct mbuf *mrep, *md;
    935 	int len;
    936 
    937 	*repstat = 0;						/* 08 Aug 92*/
    938 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    939 		error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    940 	} else {
    941 		mrep = (struct mbuf *)0;
    942 		do {
    943 			if (mrep) {
    944 				m_freem(*nam);
    945 				m_freem(mrep);
    946 			}
    947 			error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    948 		} while (!error && nfs_badnam(*nam, msk, mtch));
    949 	}
    950 	if (error)
    951 		return (error);
    952 	md = mrep;
    953 	mrep = nfs_uncompress(mrep);
    954 	if (mrep != md) {
    955 		*wascomp = 1;
    956 		md = mrep;
    957 	} else
    958 		*wascomp = 0;
    959 	dpos = mtod(mrep, caddr_t);
    960 	nfsm_disect(tl, u_long *, 10*NFSX_UNSIGNED);
    961 	*retxid = fxdr_unsigned(u_long, *tl++);
    962 	if (*tl++ != rpc_call || *tl++ != rpc_vers) {		/* 08 Aug 92*/
    963 		*mrp = mrep;
    964 		*procnum = NFSPROC_NOOP;
    965 		*repstat = ERPCMISMATCH;
    966 		return (0);
    967 	}
    968 	if (*tl++ != prog) {
    969 		*mrp = mrep;					/* 08 Aug 92*/
    970 		*procnum = NFSPROC_NOOP;
    971 		*repstat = EPROGUNAVAIL;
    972 		return (0);
    973 	}
    974 	if (*tl++ != vers) {
    975 		*mrp = mrep;					/* 08 Aug 92*/
    976 		*procnum = NFSPROC_NOOP;
    977 		*repstat = EPROGMISMATCH;
    978 		return (0);
    979 	}
    980 	*procnum = fxdr_unsigned(u_long, *tl++);
    981 	if (*procnum == NFSPROC_NULL) {
    982 		*mrp = mrep;
    983 		return (0);
    984 	}
    985 	if (*procnum > maxproc || *tl++ != rpc_auth_unix) {
    986 		*mrp = mrep;					/* 08 Aug 92*/
    987 		*procnum = NFSPROC_NOOP;
    988 		*repstat = EPROCUNAVAIL;
    989 		return (0);
    990 	}
    991 	len = fxdr_unsigned(int, *tl++);
    992 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
    993 		m_freem(mrep);
    994 		return (EBADRPC);
    995 	}
    996 	len = fxdr_unsigned(int, *++tl);
    997 	if (len < 0 || len > NFS_MAXNAMLEN) {
    998 		m_freem(mrep);
    999 		return (EBADRPC);
   1000 	}
   1001 	nfsm_adv(nfsm_rndup(len));
   1002 	nfsm_disect(tl, u_long *, 3*NFSX_UNSIGNED);
   1003 	cr->cr_uid = fxdr_unsigned(uid_t, *tl++);
   1004 	cr->cr_gid = fxdr_unsigned(gid_t, *tl++);
   1005 	len = fxdr_unsigned(int, *tl);
   1006 	if (len < 0 || len > RPCAUTH_UNIXGIDS) {
   1007 		m_freem(mrep);
   1008 		return (EBADRPC);
   1009 	}
   1010 	nfsm_disect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
   1011 	for (i = 1; i <= len; i++)
   1012 		if (i < NGROUPS)
   1013 			cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
   1014 		else
   1015 			tl++;
   1016 	cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
   1017 	/*
   1018 	 * Do we have any use for the verifier.
   1019 	 * According to the "Remote Procedure Call Protocol Spec." it
   1020 	 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
   1021 	 * For now, just skip over it
   1022 	 */
   1023 	len = fxdr_unsigned(int, *++tl);
   1024 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
   1025 		m_freem(mrep);
   1026 		return (EBADRPC);
   1027 	}
   1028 	if (len > 0)
   1029 		nfsm_adv(nfsm_rndup(len));
   1030 	*mrp = mrep;
   1031 	*mdp = md;
   1032 	*dposp = dpos;
   1033 	return (0);
   1034 nfsmout:
   1035 	return (error);
   1036 }
   1037 
   1038 /*
   1039  * Generate the rpc reply header
   1040  * siz arg. is used to decide if adding a cluster is worthwhile
   1041  */
   1042 nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
   1043 	int siz;
   1044 	u_long retxid;
   1045 	int err;
   1046 	struct mbuf **mrq;
   1047 	struct mbuf **mbp;
   1048 	caddr_t *bposp;
   1049 {
   1050 	register u_long *tl;
   1051 	register long t1;
   1052 	caddr_t bpos;
   1053 	struct mbuf *mreq, *mb, *mb2;
   1054 
   1055 	NFSMGETHDR(mreq);
   1056 	mb = mreq;
   1057 	if ((siz+RPC_REPLYSIZ) > MHLEN)
   1058 		MCLGET(mreq, M_WAIT);
   1059 	tl = mtod(mreq, u_long *);
   1060 	mreq->m_len = 6*NFSX_UNSIGNED;
   1061 	bpos = ((caddr_t)tl)+mreq->m_len;
   1062 	*tl++ = txdr_unsigned(retxid);
   1063 	*tl++ = rpc_reply;
   1064 	if (err == ERPCMISMATCH) {
   1065 		*tl++ = rpc_msgdenied;
   1066 		*tl++ = rpc_mismatch;
   1067 		*tl++ = txdr_unsigned(2);
   1068 		*tl = txdr_unsigned(2);
   1069 	} else {
   1070 		*tl++ = rpc_msgaccepted;
   1071 		*tl++ = 0;
   1072 		*tl++ = 0;
   1073 		switch (err) {
   1074 		case EPROGUNAVAIL:
   1075 			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
   1076 			break;
   1077 		case EPROGMISMATCH:
   1078 			*tl = txdr_unsigned(RPC_PROGMISMATCH);
   1079 			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
   1080 			*tl++ = txdr_unsigned(2);
   1081 			*tl = txdr_unsigned(2);	/* someday 3 */
   1082 			break;
   1083 		case EPROCUNAVAIL:
   1084 			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
   1085 			break;
   1086 		default:
   1087 			*tl = 0;
   1088 			if (err != VNOVAL) {
   1089 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
   1090 				*tl = txdr_unsigned(err);
   1091 			}
   1092 			break;
   1093 		};
   1094 	}
   1095 	*mrq = mreq;
   1096 	*mbp = mb;
   1097 	*bposp = bpos;
   1098 	if (err != 0 && err != VNOVAL)
   1099 		nfsstats.srvrpc_errs++;
   1100 	return (0);
   1101 }
   1102 
   1103 /*
   1104  * Nfs timer routine
   1105  * Scan the nfsreq list and retranmit any requests that have timed out
   1106  * To avoid retransmission attempts on STREAM sockets (in the future) make
   1107  * sure to set the r_retry field to 0 (implies nm_retry == 0).
   1108  */
   1109 void
   1110 nfs_timer()
   1111 {
   1112 	register struct nfsreq *rep;
   1113 	register struct mbuf *m;
   1114 	register struct socket *so;
   1115 	register struct nfsmount *nmp;
   1116 	int s, error;
   1117 
   1118 	s = splnet();
   1119 	for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
   1120 		nmp = rep->r_nmp;
   1121 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) ||
   1122 		    (so = nmp->nm_so) == NULL)
   1123 			continue;
   1124 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) {
   1125 			rep->r_flags |= R_SOFTTERM;
   1126 			continue;
   1127 		}
   1128 		if (rep->r_flags & R_TIMING)	/* update rtt in mount */
   1129 			nmp->nm_rtt++;
   1130 		/* If not timed out */
   1131 		if (++rep->r_timer < nmp->nm_rto)
   1132 			continue;
   1133 		/* Do backoff and save new timeout in mount */
   1134 		if (rep->r_flags & R_TIMING) {
   1135 			nfs_backofftimer(nmp);
   1136 			rep->r_flags &= ~R_TIMING;
   1137 			nmp->nm_rtt = -1;
   1138 		}
   1139 		if (rep->r_flags & R_SENT) {
   1140 			rep->r_flags &= ~R_SENT;
   1141 			nmp->nm_sent--;
   1142 		}
   1143 
   1144 		/*
   1145 		 * Check for too many retries on soft mount.
   1146 		 * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
   1147 		 */
   1148 		if (++rep->r_rexmit > NFS_MAXREXMIT)
   1149 			rep->r_rexmit = NFS_MAXREXMIT;
   1150 
   1151 		/*
   1152 		 * Check for server not responding
   1153 		 */
   1154 		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
   1155 		     rep->r_rexmit > NFS_FISHY) {
   1156 			nfs_msg(rep->r_procp,
   1157 			    nmp->nm_mountp->mnt_stat.f_mntfromname,
   1158 			    "not responding");
   1159 			rep->r_flags |= R_TPRINTFMSG;
   1160 		}
   1161 		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
   1162 			nfsstats.rpctimeouts++;
   1163 			rep->r_flags |= R_SOFTTERM;
   1164 			continue;
   1165 		}
   1166 		if (nmp->nm_sotype != SOCK_DGRAM)
   1167 			continue;
   1168 
   1169 		/*
   1170 		 * If there is enough space and the window allows..
   1171 		 *	Resend it
   1172 		 */
   1173 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
   1174 		       nmp->nm_sent < nmp->nm_window &&
   1175 		       (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
   1176 			nfsstats.rpcretries++;
   1177 			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
   1178 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1179 			    (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0);
   1180 			else
   1181 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1182 			    nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0);
   1183 			if (error) {
   1184 				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
   1185 					so->so_error = 0;
   1186 			} else {
   1187 				/*
   1188 				 * We need to time the request even though we
   1189 				 * are retransmitting.
   1190 				 */
   1191 				nmp->nm_rtt = 0;
   1192 				nmp->nm_sent++;
   1193 				rep->r_flags |= (R_SENT|R_TIMING);
   1194 				rep->r_timer = rep->r_timerinit;
   1195 			}
   1196 		}
   1197 	}
   1198 	splx(s);
   1199 	timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ);
   1200 }
   1201 
   1202 /*
   1203  * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
   1204  * used here. The timer state is held in the nfsmount structure and
   1205  * a single request is used to clock the response. When successful
   1206  * the rtt smoothing in nfs_updatetimer is used, when failed the backoff
   1207  * is done by nfs_backofftimer. We also log failure messages in these
   1208  * routines.
   1209  *
   1210  * Congestion variables are held in the nfshost structure which
   1211  * is referenced by nfsmounts and shared per-server. This separation
   1212  * makes it possible to do per-mount timing which allows varying disk
   1213  * access times to be dealt with, while preserving a network oriented
   1214  * congestion control scheme.
   1215  *
   1216  * The windowing implements the Jacobson/Karels slowstart algorithm
   1217  * with adjusted scaling factors. We start with one request, then send
   1218  * 4 more after each success until the ssthresh limit is reached, then
   1219  * we increment at a rate proportional to the window. On failure, we
   1220  * remember 3/4 the current window and clamp the send limit to 1. Note
   1221  * ICMP source quench is not reflected in so->so_error so we ignore that
   1222  * for now.
   1223  *
   1224  * NFS behaves much more like a transport protocol with these changes,
   1225  * shedding the teenage pedal-to-the-metal tendencies of "other"
   1226  * implementations.
   1227  *
   1228  * Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
   1229  */
   1230 
   1231 /*
   1232  * The TCP algorithm was not forgiving enough. Because the NFS server
   1233  * responds only after performing lookups/diskio/etc, we have to be
   1234  * more prepared to accept a spiky variance. The TCP algorithm is:
   1235  * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
   1236  */
   1237 #define NFS_RTO(nmp)	(((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
   1238 
   1239 nfs_updatetimer(nmp)
   1240 	register struct nfsmount *nmp;
   1241 {
   1242 
   1243 	/* If retransmitted, clear and return */
   1244 	if (nmp->nm_rexmit || nmp->nm_currexmit) {
   1245 		nmp->nm_rexmit = nmp->nm_currexmit = 0;
   1246 		return;
   1247 	}
   1248 	/* If have a measurement, do smoothing */
   1249 	if (nmp->nm_srtt) {
   1250 		register short delta;
   1251 		delta = nmp->nm_rtt - (nmp->nm_srtt >> 3);
   1252 		if ((nmp->nm_srtt += delta) <= 0)
   1253 			nmp->nm_srtt = 1;
   1254 		if (delta < 0)
   1255 			delta = -delta;
   1256 		delta -= (nmp->nm_rttvar >> 2);
   1257 		if ((nmp->nm_rttvar += delta) <= 0)
   1258 			nmp->nm_rttvar = 1;
   1259 	/* Else initialize */
   1260 	} else {
   1261 		nmp->nm_rttvar = nmp->nm_rtt << 1;
   1262 		if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2;
   1263 		nmp->nm_srtt = nmp->nm_rttvar << 2;
   1264 	}
   1265 	/* Compute new Retransmission TimeOut and clip */
   1266 	nmp->nm_rto = NFS_RTO(nmp);
   1267 	if (nmp->nm_rto < NFS_MINTIMEO)
   1268 		nmp->nm_rto = NFS_MINTIMEO;
   1269 	else if (nmp->nm_rto > NFS_MAXTIMEO)
   1270 		nmp->nm_rto = NFS_MAXTIMEO;
   1271 
   1272 	/* Update window estimate */
   1273 	if (nmp->nm_window < nmp->nm_ssthresh)	/* quickly */
   1274 		nmp->nm_window += 4;
   1275 	else {						/* slowly */
   1276 		register long incr = ++nmp->nm_winext;
   1277 		incr = (incr * incr) / nmp->nm_window;
   1278 		if (incr > 0) {
   1279 			nmp->nm_winext = 0;
   1280 			++nmp->nm_window;
   1281 		}
   1282 	}
   1283 	if (nmp->nm_window > NFS_MAXWINDOW)
   1284 		nmp->nm_window = NFS_MAXWINDOW;
   1285 }
   1286 
   1287 nfs_backofftimer(nmp)
   1288 	register struct nfsmount *nmp;
   1289 {
   1290 	register unsigned long newrto;
   1291 
   1292 	/* Clip shift count */
   1293 	if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto)
   1294 		nmp->nm_rexmit = 8 * sizeof nmp->nm_rto;
   1295 	/* Back off RTO exponentially */
   1296 	newrto = NFS_RTO(nmp);
   1297 	newrto <<= (nmp->nm_rexmit - 1);
   1298 	if (newrto == 0 || newrto > NFS_MAXTIMEO)
   1299 		newrto = NFS_MAXTIMEO;
   1300 	nmp->nm_rto = newrto;
   1301 
   1302 	/* If too many retries, message, assume a bogus RTT and re-measure */
   1303 	if (nmp->nm_currexmit < nmp->nm_rexmit) {
   1304 		nmp->nm_currexmit = nmp->nm_rexmit;
   1305 		if (nmp->nm_currexmit >= nfsrexmtthresh) {
   1306 			if (nmp->nm_currexmit == nfsrexmtthresh) {
   1307 				nmp->nm_rttvar += (nmp->nm_srtt >> 2);
   1308 				nmp->nm_srtt = 0;
   1309 			}
   1310 		}
   1311 	}
   1312 	/* Close down window but remember this point (3/4 current) for later */
   1313 	nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2;
   1314 	nmp->nm_window = 1;
   1315 	nmp->nm_winext = 0;
   1316 }
   1317 
   1318 /*
   1319  * Test for a termination signal pending on procp.
   1320  * This is used for NFSMNT_INT mounts.
   1321  */
   1322 nfs_sigintr(p)
   1323 	register struct proc *p;
   1324 {
   1325 	if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) &
   1326 	    NFSINT_SIGMASK))
   1327 		return (1);
   1328 	else
   1329 		return (0);
   1330 }
   1331 
   1332 nfs_msg(p, server, msg)
   1333 	struct proc *p;
   1334 	char *server, *msg;
   1335 {
   1336 	tpr_t tpr;
   1337 
   1338 	if (p)
   1339 		tpr = tprintf_open(p);
   1340 	else
   1341 		tpr = NULL;
   1342 	tprintf(tpr, "nfs server %s: %s\n", server, msg);
   1343 	tprintf_close(tpr);
   1344 }
   1345 
   1346 /*
   1347  * Lock a socket against others.
   1348  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
   1349  * and also to avoid race conditions between the processes with nfs requests
   1350  * in progress when a reconnect is necessary.
   1351  */
   1352 nfs_solock(flagp)
   1353 	register int *flagp;
   1354 {
   1355 
   1356 	while (*flagp & NFSMNT_SCKLOCK) {
   1357 		*flagp |= NFSMNT_WANTSCK;
   1358 		(void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0);
   1359 	}
   1360 	*flagp |= NFSMNT_SCKLOCK;
   1361 }
   1362 
   1363 /*
   1364  * Unlock the stream socket for others.
   1365  */
   1366 nfs_sounlock(flagp)
   1367 	register int *flagp;
   1368 {
   1369 
   1370 	if ((*flagp & NFSMNT_SCKLOCK) == 0)
   1371 		panic("nfs sounlock");
   1372 	*flagp &= ~NFSMNT_SCKLOCK;
   1373 	if (*flagp & NFSMNT_WANTSCK) {
   1374 		*flagp &= ~NFSMNT_WANTSCK;
   1375 		wakeup((caddr_t)flagp);
   1376 	}
   1377 }
   1378 
   1379 /*
   1380  * This function compares two net addresses by family and returns TRUE
   1381  * if they are the same.
   1382  * If there is any doubt, return FALSE.
   1383  */
   1384 nfs_netaddr_match(nam1, nam2)
   1385 	struct mbuf *nam1, *nam2;
   1386 {
   1387 	register struct sockaddr *saddr1, *saddr2;
   1388 
   1389 	saddr1 = mtod(nam1, struct sockaddr *);
   1390 	saddr2 = mtod(nam2, struct sockaddr *);
   1391 	if (saddr1->sa_family != saddr2->sa_family)
   1392 		return (0);
   1393 
   1394 	/*
   1395 	 * Must do each address family separately since unused fields
   1396 	 * are undefined values and not always zeroed.
   1397 	 */
   1398 	switch (saddr1->sa_family) {
   1399 	case AF_INET:
   1400 		if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr ==
   1401 		    ((struct sockaddr_in *)saddr2)->sin_addr.s_addr)
   1402 			return (1);
   1403 		break;
   1404 	default:
   1405 		break;
   1406 	};
   1407 	return (0);
   1408 }
   1409 
   1410 /*
   1411  * Check the hostname fields for nfsd's mask and match fields.
   1412  * By address family:
   1413  * - Bitwise AND the mask with the host address field
   1414  * - Compare for == with match
   1415  * return TRUE if not equal
   1416  */
   1417 nfs_badnam(nam, msk, mtch)
   1418 	register struct mbuf *nam, *msk, *mtch;
   1419 {
   1420 	switch (mtod(nam, struct sockaddr *)->sa_family) {
   1421 	case AF_INET:
   1422 		return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr &
   1423 			 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) !=
   1424 			 mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr);
   1425 	default:
   1426 		printf("nfs_badmatch, unknown sa_family\n");
   1427 		return (0);
   1428 	};
   1429 }
   1430