Home | History | Annotate | Line # | Download | only in nfs
nfs_socket.c revision 1.3
      1 /*
      2  * Copyright (c) 1989, 1991 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Rick Macklem at The University of Guelph.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  *
     36  *	@(#)nfs_socket.c	7.23 (Berkeley) 4/20/91
     37  *
     38  * PATCHES MAGIC                LEVEL   PATCH THAT GOT US HERE
     39  * --------------------         -----   ----------------------
     40  * CURRENT PATCH LEVEL:         1       00053
     41  * --------------------         -----   ----------------------
     42  *
     43  * 08 Sep 92    Rick "gopher I"         Fix "reserved port" bug, fixed for
     44  *						AIX3.2 NFS clients
     45  */
     46 
     47 /*
     48  * Socket operations for use by nfs
     49  */
     50 
     51 #include "param.h"
     52 #include "proc.h"
     53 #include "mount.h"
     54 #include "kernel.h"
     55 #include "malloc.h"
     56 #include "mbuf.h"
     57 #include "namei.h"
     58 #include "vnode.h"
     59 #include "domain.h"
     60 #include "protosw.h"
     61 #include "socket.h"
     62 #include "socketvar.h"
     63 #include "syslog.h"
     64 #include "tprintf.h"
     65 #include "../netinet/in.h"
     66 #include "../netinet/tcp.h"
     67 
     68 #include "rpcv2.h"
     69 #include "nfsv2.h"
     70 #include "nfs.h"
     71 #include "xdr_subs.h"
     72 #include "nfsm_subs.h"
     73 #include "nfsmount.h"
     74 
     75 #define	TRUE	1
     76 #define	FALSE	0
     77 
     78 /*
     79  * External data, mostly RPC constants in XDR form
     80  */
     81 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
     82 	rpc_msgaccepted, rpc_call;
     83 extern u_long nfs_prog, nfs_vers;
     84 /* Maybe these should be bits in a u_long ?? */
     85 /*
     86  * Static array that defines which nfs rpc's are nonidempotent
     87  */
     88 int nonidempotent[NFS_NPROCS] = {
     89 	FALSE,
     90 	FALSE,
     91 	TRUE,
     92 	FALSE,
     93 	FALSE,
     94 	FALSE,
     95 	FALSE,
     96 	FALSE,
     97 	TRUE,
     98 	TRUE,
     99 	TRUE,
    100 	TRUE,
    101 	TRUE,
    102 	TRUE,
    103 	TRUE,
    104 	TRUE,
    105 	FALSE,
    106 	FALSE,
    107 };
    108 static int compressrequest[NFS_NPROCS] = {
    109 	FALSE,
    110 	TRUE,
    111 	TRUE,
    112 	FALSE,
    113 	TRUE,
    114 	TRUE,
    115 	TRUE,
    116 	FALSE,
    117 	FALSE,
    118 	TRUE,
    119 	TRUE,
    120 	TRUE,
    121 	TRUE,
    122 	TRUE,
    123 	TRUE,
    124 	TRUE,
    125 	TRUE,
    126 	TRUE,
    127 };
    128 int	nfs_sbwait();
    129 void	nfs_disconnect();
    130 struct mbuf *nfs_compress(), *nfs_uncompress();
    131 
    132 
    133 struct nfsreq nfsreqh;
    134 int nfsrexmtthresh = NFS_FISHY;
    135 int nfs_tcpnodelay = 1;
    136 
    137 /*
    138  * Initialize sockets and congestion for a new NFS connection.
    139  * We do not free the sockaddr if error.
    140  */
    141 nfs_connect(nmp)
    142 	register struct nfsmount *nmp;
    143 {
    144 	register struct socket *so;
    145 	struct sockaddr *saddr;					/* 08 Sep 92*/
    146 	int s, error, bufsize;
    147 	struct mbuf *m;
    148 	struct sockaddr_in *sin;				/* 08 Sep 92*/
    149 	u_short tport;						/* 08 Sep 92*/
    150 
    151 	nmp->nm_so = (struct socket *)0;
    152 	saddr = mtod(nmp->nm_nam, struct sockaddr *);		/* 08 Sep 92*/
    153 	if (error = socreate(saddr->sa_family,			/* 08 Sep 92*/
    154 		&nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
    155 		goto bad;
    156 	so = nmp->nm_so;
    157 	nmp->nm_soflags = so->so_proto->pr_flags;
    158 
    159 	/*
    160 	 * 08 Sep 92
    161 	 *
    162 	 * Some servers require that the client port be a reserved port number.
    163 	 */
    164 	if (saddr->sa_family == AF_INET) {
    165 		MGET(m, M_WAIT, MT_SONAME);
    166 		sin = mtod(m, struct sockaddr_in *);
    167 		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
    168 		sin->sin_family = AF_INET;
    169 		sin->sin_addr.s_addr = INADDR_ANY;
    170 		tport = IPPORT_RESERVED - 1;
    171 		sin->sin_port = htons(tport);
    172 		while (sobind(so, m) == EADDRINUSE &&
    173 		       --tport > IPPORT_RESERVED / 2)
    174 			sin->sin_port = htons(tport);
    175 		m_freem(m);
    176 	}
    177 
    178 	if (nmp->nm_sotype == SOCK_DGRAM)
    179 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR),
    180 		    NFS_MAXPACKET);
    181 	else
    182 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)),
    183 		    NFS_MAXPACKET + sizeof(u_long));
    184 	if (error = soreserve(so, bufsize, bufsize))
    185 		goto bad;
    186 
    187 	/*
    188 	 * Protocols that do not require connections may be optionally left
    189 	 * unconnected for servers that reply from a port other than NFS_PORT.
    190 	 */
    191 	if (nmp->nm_flag & NFSMNT_NOCONN) {
    192 		if (nmp->nm_soflags & PR_CONNREQUIRED) {
    193 			error = ENOTCONN;
    194 			goto bad;
    195 		}
    196 	} else {
    197 		if (error = soconnect(so, nmp->nm_nam))
    198 			goto bad;
    199 
    200 		/*
    201 		 * Wait for the connection to complete. Cribbed from the
    202 		 * connect system call but with the wait at negative prio.
    203 		 */
    204 		s = splnet();
    205 		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
    206 			(void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0);
    207 		splx(s);
    208 		if (so->so_error) {
    209 			error = so->so_error;
    210 			goto bad;
    211 		}
    212 	}
    213 	if (nmp->nm_sotype == SOCK_DGRAM) {
    214 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    215 			so->so_rcv.sb_timeo = (5 * hz);
    216 			so->so_snd.sb_timeo = (5 * hz);
    217 		} else {
    218 			so->so_rcv.sb_timeo = 0;
    219 			so->so_snd.sb_timeo = 0;
    220 		}
    221 		nmp->nm_rto = NFS_TIMEO;
    222 	} else {
    223 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    224 			so->so_rcv.sb_timeo = (5 * hz);
    225 			so->so_snd.sb_timeo = (5 * hz);
    226 		} else {
    227 			so->so_rcv.sb_timeo = 0;
    228 			so->so_snd.sb_timeo = 0;
    229 		}
    230 		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    231 			MGET(m, M_WAIT, MT_SOOPTS);
    232 			*mtod(m, int *) = 1;
    233 			m->m_len = sizeof(int);
    234 			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
    235 		}
    236 		if (so->so_proto->pr_domain->dom_family == AF_INET &&
    237 		    so->so_proto->pr_protocol == IPPROTO_TCP &&
    238 		    nfs_tcpnodelay) {
    239 			MGET(m, M_WAIT, MT_SOOPTS);
    240 			*mtod(m, int *) = 1;
    241 			m->m_len = sizeof(int);
    242 			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
    243 		}
    244 		nmp->nm_rto = 10 * NFS_TIMEO;		/* XXX */
    245 	}
    246 	so->so_rcv.sb_flags |= SB_NOINTR;
    247 	so->so_snd.sb_flags |= SB_NOINTR;
    248 
    249 	/* Initialize other non-zero congestion variables */
    250 	nmp->nm_window = 2;			/* Initial send window */
    251 	nmp->nm_ssthresh = NFS_MAXWINDOW;	/* Slowstart threshold */
    252 	nmp->nm_rttvar = nmp->nm_rto << 1;
    253 	nmp->nm_sent = 0;
    254 	nmp->nm_currexmit = 0;
    255 	return (0);
    256 
    257 bad:
    258 	nfs_disconnect(nmp);
    259 	return (error);
    260 }
    261 
    262 /*
    263  * Reconnect routine:
    264  * Called when a connection is broken on a reliable protocol.
    265  * - clean up the old socket
    266  * - nfs_connect() again
    267  * - set R_MUSTRESEND for all outstanding requests on mount point
    268  * If this fails the mount point is DEAD!
    269  * nb: Must be called with the nfs_solock() set on the mount point.
    270  */
    271 nfs_reconnect(rep, nmp)
    272 	register struct nfsreq *rep;
    273 	register struct nfsmount *nmp;
    274 {
    275 	register struct nfsreq *rp;
    276 	int error;
    277 
    278 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    279 	    "trying reconnect");
    280 	while (error = nfs_connect(nmp)) {
    281 #ifdef lint
    282 		error = error;
    283 #endif /* lint */
    284 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp))
    285 			return (EINTR);
    286 		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
    287 	}
    288 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    289 	    "reconnected");
    290 
    291 	/*
    292 	 * Loop through outstanding request list and fix up all requests
    293 	 * on old socket.
    294 	 */
    295 	rp = nfsreqh.r_next;
    296 	while (rp != &nfsreqh) {
    297 		if (rp->r_nmp == nmp)
    298 			rp->r_flags |= R_MUSTRESEND;
    299 		rp = rp->r_next;
    300 	}
    301 	return (0);
    302 }
    303 
    304 /*
    305  * NFS disconnect. Clean up and unlink.
    306  */
    307 void
    308 nfs_disconnect(nmp)
    309 	register struct nfsmount *nmp;
    310 {
    311 	register struct socket *so;
    312 
    313 	if (nmp->nm_so) {
    314 		so = nmp->nm_so;
    315 		nmp->nm_so = (struct socket *)0;
    316 		soshutdown(so, 2);
    317 		soclose(so);
    318 	}
    319 }
    320 
    321 /*
    322  * This is the nfs send routine. For connection based socket types, it
    323  * must be called with an nfs_solock() on the socket.
    324  * "rep == NULL" indicates that it has been called from a server.
    325  */
    326 nfs_send(so, nam, top, rep)
    327 	register struct socket *so;
    328 	struct mbuf *nam;
    329 	register struct mbuf *top;
    330 	struct nfsreq *rep;
    331 {
    332 	struct mbuf *sendnam;
    333 	int error, soflags;
    334 
    335 	if (rep) {
    336 		if (rep->r_flags & R_SOFTTERM) {
    337 			m_freem(top);
    338 			return (EINTR);
    339 		}
    340 		if (rep->r_nmp->nm_so == NULL &&
    341 		    (error = nfs_reconnect(rep, rep->r_nmp)))
    342 			return (error);
    343 		rep->r_flags &= ~R_MUSTRESEND;
    344 		so = rep->r_nmp->nm_so;
    345 		soflags = rep->r_nmp->nm_soflags;
    346 	} else
    347 		soflags = so->so_proto->pr_flags;
    348 	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
    349 		sendnam = (struct mbuf *)0;
    350 	else
    351 		sendnam = nam;
    352 
    353 	error = sosend(so, sendnam, (struct uio *)0, top,
    354 		(struct mbuf *)0, 0);
    355 	if (error == EWOULDBLOCK && rep) {
    356 		if (rep->r_flags & R_SOFTTERM)
    357 			error = EINTR;
    358 		else {
    359 			rep->r_flags |= R_MUSTRESEND;
    360 			error = 0;
    361 		}
    362 	}
    363 	/*
    364 	 * Ignore socket errors??
    365 	 */
    366 	if (error && error != EINTR && error != ERESTART)
    367 		error = 0;
    368 	return (error);
    369 }
    370 
    371 /*
    372  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
    373  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
    374  * Mark and consolidate the data into a new mbuf list.
    375  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
    376  *     small mbufs.
    377  * For SOCK_STREAM we must be very careful to read an entire record once
    378  * we have read any of it, even if the system call has been interrupted.
    379  */
    380 nfs_receive(so, aname, mp, rep)
    381 	register struct socket *so;
    382 	struct mbuf **aname;
    383 	struct mbuf **mp;
    384 	register struct nfsreq *rep;
    385 {
    386 	struct uio auio;
    387 	struct iovec aio;
    388 	register struct mbuf *m;
    389 	struct mbuf *m2, *mnew, **mbp;
    390 	caddr_t fcp, tcp;
    391 	u_long len;
    392 	struct mbuf **getnam;
    393 	int error, siz, mlen, soflags, rcvflg;
    394 
    395 	/*
    396 	 * Set up arguments for soreceive()
    397 	 */
    398 	*mp = (struct mbuf *)0;
    399 	*aname = (struct mbuf *)0;
    400 	if (rep)
    401 		soflags = rep->r_nmp->nm_soflags;
    402 	else
    403 		soflags = so->so_proto->pr_flags;
    404 
    405 	/*
    406 	 * For reliable protocols, lock against other senders/receivers
    407 	 * in case a reconnect is necessary.
    408 	 * For SOCK_STREAM, first get the Record Mark to find out how much
    409 	 * more there is to get.
    410 	 * We must lock the socket against other receivers
    411 	 * until we have an entire rpc request/reply.
    412 	 */
    413 	if (soflags & PR_CONNREQUIRED) {
    414 tryagain:
    415 		/*
    416 		 * Check for fatal errors and resending request.
    417 		 */
    418 		if (rep) {
    419 			/*
    420 			 * Ugh: If a reconnect attempt just happened, nm_so
    421 			 * would have changed. NULL indicates a failed
    422 			 * attempt that has essentially shut down this
    423 			 * mount point.
    424 			 */
    425 			if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL ||
    426 				(rep->r_flags & R_SOFTTERM))
    427 				return (EINTR);
    428 			while (rep->r_flags & R_MUSTRESEND) {
    429 				m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
    430 				nfsstats.rpcretries++;
    431 				if (error = nfs_send(so, rep->r_nmp->nm_nam, m,
    432 					rep))
    433 					goto errout;
    434 			}
    435 		}
    436 		if ((soflags & PR_ATOMIC) == 0) {
    437 			aio.iov_base = (caddr_t) &len;
    438 			aio.iov_len = sizeof(u_long);
    439 			auio.uio_iov = &aio;
    440 			auio.uio_iovcnt = 1;
    441 			auio.uio_segflg = UIO_SYSSPACE;
    442 			auio.uio_rw = UIO_READ;
    443 			auio.uio_procp = (struct proc *)0;
    444 			auio.uio_offset = 0;
    445 			auio.uio_resid = sizeof(u_long);
    446 			do {
    447 			    rcvflg = MSG_WAITALL;
    448 			    error = soreceive(so, (struct mbuf **)0, &auio,
    449 				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
    450 			    if (error == EWOULDBLOCK && rep) {
    451 				if (rep->r_flags & R_SOFTTERM)
    452 					return (EINTR);
    453 				if (rep->r_flags & R_MUSTRESEND)
    454 					goto tryagain;
    455 			    }
    456 			} while (error == EWOULDBLOCK);
    457 			if (!error && auio.uio_resid > 0) {
    458 			    if (rep)
    459 				log(LOG_INFO,
    460 				   "short receive (%d/%d) from nfs server %s\n",
    461 				   sizeof(u_long) - auio.uio_resid,
    462 				   sizeof(u_long),
    463 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    464 			    error = EPIPE;
    465 			}
    466 			if (error)
    467 				goto errout;
    468 			len = ntohl(len) & ~0x80000000;
    469 			/*
    470 			 * This is SERIOUS! We are out of sync with the sender
    471 			 * and forcing a disconnect/reconnect is all I can do.
    472 			 */
    473 			if (len > NFS_MAXPACKET) {
    474 			    if (rep)
    475 				log(LOG_ERR, "%s (%d) from nfs server %s\n",
    476 				    "impossible packet length",
    477 				    len,
    478 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    479 			    error = EFBIG;
    480 			    goto errout;
    481 			}
    482 			auio.uio_resid = len;
    483 			do {
    484 			    rcvflg = MSG_WAITALL;
    485 			    error =  soreceive(so, (struct mbuf **)0,
    486 				&auio, mp, (struct mbuf **)0, &rcvflg);
    487 			} while (error == EWOULDBLOCK || error == EINTR ||
    488 				 error == ERESTART);
    489 			if (!error && auio.uio_resid > 0) {
    490 			    if (rep)
    491 				log(LOG_INFO,
    492 				   "short receive (%d/%d) from nfs server %s\n",
    493 				   len - auio.uio_resid, len,
    494 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    495 			    error = EPIPE;
    496 			}
    497 		} else {
    498 			auio.uio_resid = len = 1000000;	/* Anything Big */
    499 			do {
    500 			    rcvflg = 0;
    501 			    error =  soreceive(so, (struct mbuf **)0,
    502 				&auio, mp, (struct mbuf **)0, &rcvflg);
    503 			    if (error == EWOULDBLOCK && rep) {
    504 				if (rep->r_flags & R_SOFTTERM)
    505 					return (EINTR);
    506 				if (rep->r_flags & R_MUSTRESEND)
    507 					goto tryagain;
    508 			    }
    509 			} while (error == EWOULDBLOCK);
    510 			if (!error && *mp == NULL)
    511 				error = EPIPE;
    512 			len -= auio.uio_resid;
    513 		}
    514 errout:
    515 		if (error && rep && error != EINTR && error != ERESTART) {
    516 			m_freem(*mp);
    517 			*mp = (struct mbuf *)0;
    518 			if (error != EPIPE && rep)
    519 				log(LOG_INFO,
    520 				    "receive error %d from nfs server %s\n",
    521 				    error,
    522 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    523 			nfs_disconnect(rep->r_nmp);
    524 			error = nfs_reconnect(rep, rep->r_nmp);
    525 			if (!error)
    526 				goto tryagain;
    527 		}
    528 	} else {
    529 		if (so->so_state & SS_ISCONNECTED)
    530 			getnam = (struct mbuf **)0;
    531 		else
    532 			getnam = aname;
    533 		auio.uio_resid = len = 1000000;
    534 		do {
    535 			rcvflg = 0;
    536 			error =  soreceive(so, getnam, &auio, mp,
    537 				(struct mbuf **)0, &rcvflg);
    538 			if (error == EWOULDBLOCK && rep &&
    539 			    (rep->r_flags & R_SOFTTERM))
    540 				return (EINTR);
    541 		} while (error == EWOULDBLOCK);
    542 		len -= auio.uio_resid;
    543 	}
    544 	if (error) {
    545 		m_freem(*mp);
    546 		*mp = (struct mbuf *)0;
    547 	}
    548 	/*
    549 	 * Search for any mbufs that are not a multiple of 4 bytes long.
    550 	 * These could cause pointer alignment problems, so copy them to
    551 	 * well aligned mbufs.
    552 	 */
    553 	m = *mp;
    554 	mbp = mp;
    555 	while (m) {
    556 		/*
    557 		 * All this for something that may never happen.
    558 		 */
    559 		if (m->m_next && (m->m_len & 0x3)) {
    560 			printf("nfs_rcv odd length!\n");
    561 			mlen = 0;
    562 			while (m) {
    563 				fcp = mtod(m, caddr_t);
    564 				while (m->m_len > 0) {
    565 					if (mlen == 0) {
    566 						MGET(m2, M_WAIT, MT_DATA);
    567 						if (len >= MINCLSIZE)
    568 							MCLGET(m2, M_WAIT);
    569 						m2->m_len = 0;
    570 						mlen = M_TRAILINGSPACE(m2);
    571 						tcp = mtod(m2, caddr_t);
    572 						*mbp = m2;
    573 						mbp = &m2->m_next;
    574 					}
    575 					siz = MIN(mlen, m->m_len);
    576 					bcopy(fcp, tcp, siz);
    577 					m2->m_len += siz;
    578 					mlen -= siz;
    579 					len -= siz;
    580 					tcp += siz;
    581 					m->m_len -= siz;
    582 					fcp += siz;
    583 				}
    584 				MFREE(m, mnew);
    585 				m = mnew;
    586 			}
    587 			break;
    588 		}
    589 		len -= m->m_len;
    590 		mbp = &m->m_next;
    591 		m = m->m_next;
    592 	}
    593 	return (error);
    594 }
    595 
    596 /*
    597  * Implement receipt of reply on a socket.
    598  * We must search through the list of received datagrams matching them
    599  * with outstanding requests using the xid, until ours is found.
    600  */
    601 /* ARGSUSED */
    602 nfs_reply(nmp, myrep)
    603 	struct nfsmount *nmp;
    604 	struct nfsreq *myrep;
    605 {
    606 	register struct mbuf *m;
    607 	register struct nfsreq *rep;
    608 	register int error = 0;
    609 	u_long rxid;
    610 	struct mbuf *mp, *nam;
    611 	char *cp;
    612 	int cnt, xfer;
    613 
    614 	/*
    615 	 * Loop around until we get our own reply
    616 	 */
    617 	for (;;) {
    618 		/*
    619 		 * Lock against other receivers so that I don't get stuck in
    620 		 * sbwait() after someone else has received my reply for me.
    621 		 * Also necessary for connection based protocols to avoid
    622 		 * race conditions during a reconnect.
    623 		 */
    624 		nfs_solock(&nmp->nm_flag);
    625 		/* Already received, bye bye */
    626 		if (myrep->r_mrep != NULL) {
    627 			nfs_sounlock(&nmp->nm_flag);
    628 			return (0);
    629 		}
    630 		/*
    631 		 * Get the next Rpc reply off the socket
    632 		 */
    633 		if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) {
    634 			nfs_sounlock(&nmp->nm_flag);
    635 
    636 			/*
    637 			 * Ignore routing errors on connectionless protocols??
    638 			 */
    639 			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
    640 				nmp->nm_so->so_error = 0;
    641 				continue;
    642 			}
    643 
    644 			/*
    645 			 * Otherwise cleanup and return a fatal error.
    646 			 */
    647 			if (myrep->r_flags & R_TIMING) {
    648 				myrep->r_flags &= ~R_TIMING;
    649 				nmp->nm_rtt = -1;
    650 			}
    651 			if (myrep->r_flags & R_SENT) {
    652 				myrep->r_flags &= ~R_SENT;
    653 				nmp->nm_sent--;
    654 			}
    655 			return (error);
    656 		}
    657 
    658 		/*
    659 		 * Get the xid and check that it is an rpc reply
    660 		 */
    661 		m = mp;
    662 		while (m && m->m_len == 0)
    663 			m = m->m_next;
    664 		if (m == NULL) {
    665 			nfsstats.rpcinvalid++;
    666 			m_freem(mp);
    667 			nfs_sounlock(&nmp->nm_flag);
    668 			continue;
    669 		}
    670 		bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED);
    671 		/*
    672 		 * Loop through the request list to match up the reply
    673 		 * Iff no match, just drop the datagram
    674 		 */
    675 		m = mp;
    676 		rep = nfsreqh.r_next;
    677 		while (rep != &nfsreqh) {
    678 			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
    679 				/* Found it.. */
    680 				rep->r_mrep = m;
    681 				/*
    682 				 * Update timing
    683 				 */
    684 				if (rep->r_flags & R_TIMING) {
    685 					nfs_updatetimer(rep->r_nmp);
    686 					rep->r_flags &= ~R_TIMING;
    687 					rep->r_nmp->nm_rtt = -1;
    688 				}
    689 				if (rep->r_flags & R_SENT) {
    690 					rep->r_flags &= ~R_SENT;
    691 					rep->r_nmp->nm_sent--;
    692 				}
    693 				break;
    694 			}
    695 			rep = rep->r_next;
    696 		}
    697 		nfs_sounlock(&nmp->nm_flag);
    698 		if (nam)
    699 			m_freem(nam);
    700 		/*
    701 		 * If not matched to a request, drop it.
    702 		 * If it's mine, get out.
    703 		 */
    704 		if (rep == &nfsreqh) {
    705 			nfsstats.rpcunexpected++;
    706 			m_freem(m);
    707 		} else if (rep == myrep)
    708 			return (0);
    709 	}
    710 }
    711 
    712 /*
    713  * nfs_request - goes something like this
    714  *	- fill in request struct
    715  *	- links it into list
    716  *	- calls nfs_send() for first transmit
    717  *	- calls nfs_receive() to get reply
    718  *	- break down rpc header and return with nfs reply pointed to
    719  *	  by mrep or error
    720  * nb: always frees up mreq mbuf list
    721  */
    722 nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp)
    723 	struct vnode *vp;
    724 	struct mbuf *mreq;
    725 	u_long xid;
    726 	int procnum;
    727 	struct proc *procp;
    728 	int tryhard;
    729 	struct mount *mp;
    730 	struct mbuf **mrp;
    731 	struct mbuf **mdp;
    732 	caddr_t *dposp;
    733 {
    734 	register struct mbuf *m, *mrep;
    735 	register struct nfsreq *rep;
    736 	register u_long *tl;
    737 	register int len;
    738 	struct nfsmount *nmp;
    739 	struct mbuf *md;
    740 	struct nfsreq *reph;
    741 	caddr_t dpos;
    742 	char *cp2;
    743 	int t1;
    744 	int s, compressed;
    745 	int error = 0;
    746 
    747 	nmp = VFSTONFS(mp);
    748 	m = mreq;
    749 	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
    750 	rep->r_xid = xid;
    751 	rep->r_nmp = nmp;
    752 	rep->r_vp = vp;
    753 	rep->r_procp = procp;
    754 	if ((nmp->nm_flag & NFSMNT_SOFT) ||
    755 	    ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard))
    756 		rep->r_retry = nmp->nm_retry;
    757 	else
    758 		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
    759 	rep->r_flags = rep->r_rexmit = 0;
    760 	/*
    761 	 * Three cases:
    762 	 * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
    763 	 * - idempotent requests on SOCK_DGRAM use 0
    764 	 * - Reliable transports, NFS_RELIABLETIMEO
    765 	 *   Timeouts are still done on reliable transports to ensure detection
    766 	 *   of excessive connection delay.
    767 	 */
    768 	if (nmp->nm_sotype != SOCK_DGRAM)
    769 		rep->r_timerinit = -NFS_RELIABLETIMEO;
    770 	else if (nonidempotent[procnum])
    771 		rep->r_timerinit = -NFS_MINIDEMTIMEO;
    772 	else
    773 		rep->r_timerinit = 0;
    774 	rep->r_timer = rep->r_timerinit;
    775 	rep->r_mrep = NULL;
    776 	len = 0;
    777 	while (m) {
    778 		len += m->m_len;
    779 		m = m->m_next;
    780 	}
    781 	mreq->m_pkthdr.len = len;
    782 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
    783 	compressed = 0;
    784 	m = mreq;
    785 	if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) {
    786 		mreq = nfs_compress(mreq);
    787 		if (mreq != m) {
    788 			len = mreq->m_pkthdr.len;
    789 			compressed++;
    790 		}
    791 	}
    792 	/*
    793 	 * For non-atomic protocols, insert a Sun RPC Record Mark.
    794 	 */
    795 	if ((nmp->nm_soflags & PR_ATOMIC) == 0) {
    796 		M_PREPEND(mreq, sizeof(u_long), M_WAIT);
    797 		*mtod(mreq, u_long *) = htonl(0x80000000 | len);
    798 	}
    799 	rep->r_mreq = mreq;
    800 
    801 	/*
    802 	 * Do the client side RPC.
    803 	 */
    804 	nfsstats.rpcrequests++;
    805 	/*
    806 	 * Chain request into list of outstanding requests. Be sure
    807 	 * to put it LAST so timer finds oldest requests first.
    808 	 */
    809 	s = splnet();
    810 	reph = &nfsreqh;
    811 	reph->r_prev->r_next = rep;
    812 	rep->r_prev = reph->r_prev;
    813 	reph->r_prev = rep;
    814 	rep->r_next = reph;
    815 	/*
    816 	 * If backing off another request or avoiding congestion, don't
    817 	 * send this one now but let timer do it. If not timing a request,
    818 	 * do it now.
    819 	 */
    820 	if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM ||
    821 	    (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) {
    822 		nmp->nm_sent++;
    823 		rep->r_flags |= R_SENT;
    824 		if (nmp->nm_rtt == -1) {
    825 			nmp->nm_rtt = 0;
    826 			rep->r_flags |= R_TIMING;
    827 		}
    828 		splx(s);
    829 		m = m_copym(mreq, 0, M_COPYALL, M_WAIT);
    830 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    831 			nfs_solock(&nmp->nm_flag);
    832 		error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
    833 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    834 			nfs_sounlock(&nmp->nm_flag);
    835 		if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error))
    836 			nmp->nm_so->so_error = error = 0;
    837 	} else
    838 		splx(s);
    839 
    840 	/*
    841 	 * Wait for the reply from our send or the timer's.
    842 	 */
    843 	if (!error)
    844 		error = nfs_reply(nmp, rep);
    845 
    846 	/*
    847 	 * RPC done, unlink the request.
    848 	 */
    849 	s = splnet();
    850 	rep->r_prev->r_next = rep->r_next;
    851 	rep->r_next->r_prev = rep->r_prev;
    852 	splx(s);
    853 
    854 	/*
    855 	 * If there was a successful reply and a tprintf msg.
    856 	 * tprintf a response.
    857 	 */
    858 	if (!error && (rep->r_flags & R_TPRINTFMSG))
    859 		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    860 		    "is alive again");
    861 	m_freem(rep->r_mreq);
    862 	mrep = rep->r_mrep;
    863 	FREE((caddr_t)rep, M_NFSREQ);
    864 	if (error)
    865 		return (error);
    866 
    867 	if (compressed)
    868 		mrep = nfs_uncompress(mrep);
    869 	md = mrep;
    870 	/*
    871 	 * break down the rpc header and check if ok
    872 	 */
    873 	dpos = mtod(md, caddr_t);
    874 	nfsm_disect(tl, u_long *, 5*NFSX_UNSIGNED);
    875 	tl += 2;
    876 	if (*tl++ == rpc_msgdenied) {
    877 		if (*tl == rpc_mismatch)
    878 			error = EOPNOTSUPP;
    879 		else
    880 			error = EACCES;
    881 		m_freem(mrep);
    882 		return (error);
    883 	}
    884 	/*
    885 	 * skip over the auth_verf, someday we may want to cache auth_short's
    886 	 * for nfs_reqhead(), but for now just dump it
    887 	 */
    888 	if (*++tl != 0) {
    889 		len = nfsm_rndup(fxdr_unsigned(long, *tl));
    890 		nfsm_adv(len);
    891 	}
    892 	nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    893 	/* 0 == ok */
    894 	if (*tl == 0) {
    895 		nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    896 		if (*tl != 0) {
    897 			error = fxdr_unsigned(int, *tl);
    898 			m_freem(mrep);
    899 			return (error);
    900 		}
    901 		*mrp = mrep;
    902 		*mdp = md;
    903 		*dposp = dpos;
    904 		return (0);
    905 	}
    906 	m_freem(mrep);
    907 	return (EPROTONOSUPPORT);
    908 nfsmout:
    909 	return (error);
    910 }
    911 
    912 /*
    913  * Get a request for the server main loop
    914  * - receive a request via. nfs_soreceive()
    915  * - verify it
    916  * - fill in the cred struct.
    917  */
    918 nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr,
    919 	msk, mtch, wascomp, repstat)				/* 08 Aug 92*/
    920 	struct socket *so;
    921 	u_long prog;
    922 	u_long vers;
    923 	int maxproc;
    924 	struct mbuf **nam;
    925 	struct mbuf **mrp;
    926 	struct mbuf **mdp;
    927 	caddr_t *dposp;
    928 	u_long *retxid;
    929 	u_long *procnum;
    930 	register struct ucred *cr;
    931 	struct mbuf *msk, *mtch;
    932 	int *wascomp, *repstat;					/* 08 Aug 92*/
    933 {
    934 	register int i;
    935 	register u_long *tl;
    936 	register long t1;
    937 	caddr_t dpos, cp2;
    938 	int error = 0;
    939 	struct mbuf *mrep, *md;
    940 	int len;
    941 
    942 	*repstat = 0;						/* 08 Aug 92*/
    943 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    944 		error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    945 	} else {
    946 		mrep = (struct mbuf *)0;
    947 		do {
    948 			if (mrep) {
    949 				m_freem(*nam);
    950 				m_freem(mrep);
    951 			}
    952 			error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    953 		} while (!error && nfs_badnam(*nam, msk, mtch));
    954 	}
    955 	if (error)
    956 		return (error);
    957 	md = mrep;
    958 	mrep = nfs_uncompress(mrep);
    959 	if (mrep != md) {
    960 		*wascomp = 1;
    961 		md = mrep;
    962 	} else
    963 		*wascomp = 0;
    964 	dpos = mtod(mrep, caddr_t);
    965 	nfsm_disect(tl, u_long *, 10*NFSX_UNSIGNED);
    966 	*retxid = *tl++;
    967 	if (*tl++ != rpc_call || *tl++ != rpc_vers) {		/* 08 Aug 92*/
    968 		*mrp = mrep;
    969 		*procnum = NFSPROC_NOOP;
    970 		*repstat = ERPCMISMATCH;
    971 		return (0);
    972 	}
    973 	if (*tl++ != prog) {
    974 		*mrp = mrep;					/* 08 Aug 92*/
    975 		*procnum = NFSPROC_NOOP;
    976 		*repstat = EPROGUNAVAIL;
    977 		return (0);
    978 	}
    979 	if (*tl++ != vers) {
    980 		*mrp = mrep;					/* 08 Aug 92*/
    981 		*procnum = NFSPROC_NOOP;
    982 		*repstat = EPROGMISMATCH;
    983 		return (0);
    984 	}
    985 	*procnum = fxdr_unsigned(u_long, *tl++);
    986 	if (*procnum == NFSPROC_NULL) {
    987 		*mrp = mrep;
    988 		return (0);
    989 	}
    990 	if (*procnum > maxproc || *tl++ != rpc_auth_unix) {
    991 		*mrp = mrep;					/* 08 Aug 92*/
    992 		*procnum = NFSPROC_NOOP;
    993 		*repstat = EPROCUNAVAIL;
    994 		return (0);
    995 	}
    996 	len = fxdr_unsigned(int, *tl++);
    997 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
    998 		m_freem(mrep);
    999 		return (EBADRPC);
   1000 	}
   1001 	len = fxdr_unsigned(int, *++tl);
   1002 	if (len < 0 || len > NFS_MAXNAMLEN) {
   1003 		m_freem(mrep);
   1004 		return (EBADRPC);
   1005 	}
   1006 	nfsm_adv(nfsm_rndup(len));
   1007 	nfsm_disect(tl, u_long *, 3*NFSX_UNSIGNED);
   1008 	cr->cr_uid = fxdr_unsigned(uid_t, *tl++);
   1009 	cr->cr_gid = fxdr_unsigned(gid_t, *tl++);
   1010 	len = fxdr_unsigned(int, *tl);
   1011 	if (len < 0 || len > RPCAUTH_UNIXGIDS) {
   1012 		m_freem(mrep);
   1013 		return (EBADRPC);
   1014 	}
   1015 	nfsm_disect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
   1016 	for (i = 1; i <= len; i++)
   1017 		if (i < NGROUPS)
   1018 			cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
   1019 		else
   1020 			tl++;
   1021 	cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
   1022 	/*
   1023 	 * Do we have any use for the verifier.
   1024 	 * According to the "Remote Procedure Call Protocol Spec." it
   1025 	 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
   1026 	 * For now, just skip over it
   1027 	 */
   1028 	len = fxdr_unsigned(int, *++tl);
   1029 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
   1030 		m_freem(mrep);
   1031 		return (EBADRPC);
   1032 	}
   1033 	if (len > 0)
   1034 		nfsm_adv(nfsm_rndup(len));
   1035 	*mrp = mrep;
   1036 	*mdp = md;
   1037 	*dposp = dpos;
   1038 	return (0);
   1039 nfsmout:
   1040 	return (error);
   1041 }
   1042 
   1043 /*
   1044  * Generate the rpc reply header
   1045  * siz arg. is used to decide if adding a cluster is worthwhile
   1046  */
   1047 nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
   1048 	int siz;
   1049 	u_long retxid;
   1050 	int err;
   1051 	struct mbuf **mrq;
   1052 	struct mbuf **mbp;
   1053 	caddr_t *bposp;
   1054 {
   1055 	register u_long *tl;
   1056 	register long t1;
   1057 	caddr_t bpos;
   1058 	struct mbuf *mreq, *mb, *mb2;
   1059 
   1060 	NFSMGETHDR(mreq);
   1061 	mb = mreq;
   1062 	if ((siz+RPC_REPLYSIZ) > MHLEN)
   1063 		MCLGET(mreq, M_WAIT);
   1064 	tl = mtod(mreq, u_long *);
   1065 	mreq->m_len = 6*NFSX_UNSIGNED;
   1066 	bpos = ((caddr_t)tl)+mreq->m_len;
   1067 	*tl++ = retxid;
   1068 	*tl++ = rpc_reply;
   1069 	if (err == ERPCMISMATCH) {
   1070 		*tl++ = rpc_msgdenied;
   1071 		*tl++ = rpc_mismatch;
   1072 		*tl++ = txdr_unsigned(2);
   1073 		*tl = txdr_unsigned(2);
   1074 	} else {
   1075 		*tl++ = rpc_msgaccepted;
   1076 		*tl++ = 0;
   1077 		*tl++ = 0;
   1078 		switch (err) {
   1079 		case EPROGUNAVAIL:
   1080 			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
   1081 			break;
   1082 		case EPROGMISMATCH:
   1083 			*tl = txdr_unsigned(RPC_PROGMISMATCH);
   1084 			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
   1085 			*tl++ = txdr_unsigned(2);
   1086 			*tl = txdr_unsigned(2);	/* someday 3 */
   1087 			break;
   1088 		case EPROCUNAVAIL:
   1089 			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
   1090 			break;
   1091 		default:
   1092 			*tl = 0;
   1093 			if (err != VNOVAL) {
   1094 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
   1095 				*tl = txdr_unsigned(err);
   1096 			}
   1097 			break;
   1098 		};
   1099 	}
   1100 	*mrq = mreq;
   1101 	*mbp = mb;
   1102 	*bposp = bpos;
   1103 	if (err != 0 && err != VNOVAL)
   1104 		nfsstats.srvrpc_errs++;
   1105 	return (0);
   1106 }
   1107 
   1108 /*
   1109  * Nfs timer routine
   1110  * Scan the nfsreq list and retranmit any requests that have timed out
   1111  * To avoid retransmission attempts on STREAM sockets (in the future) make
   1112  * sure to set the r_retry field to 0 (implies nm_retry == 0).
   1113  */
   1114 nfs_timer()
   1115 {
   1116 	register struct nfsreq *rep;
   1117 	register struct mbuf *m;
   1118 	register struct socket *so;
   1119 	register struct nfsmount *nmp;
   1120 	int s, error;
   1121 
   1122 	s = splnet();
   1123 	for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
   1124 		nmp = rep->r_nmp;
   1125 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) ||
   1126 		    (so = nmp->nm_so) == NULL)
   1127 			continue;
   1128 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) {
   1129 			rep->r_flags |= R_SOFTTERM;
   1130 			continue;
   1131 		}
   1132 		if (rep->r_flags & R_TIMING)	/* update rtt in mount */
   1133 			nmp->nm_rtt++;
   1134 		/* If not timed out */
   1135 		if (++rep->r_timer < nmp->nm_rto)
   1136 			continue;
   1137 		/* Do backoff and save new timeout in mount */
   1138 		if (rep->r_flags & R_TIMING) {
   1139 			nfs_backofftimer(nmp);
   1140 			rep->r_flags &= ~R_TIMING;
   1141 			nmp->nm_rtt = -1;
   1142 		}
   1143 		if (rep->r_flags & R_SENT) {
   1144 			rep->r_flags &= ~R_SENT;
   1145 			nmp->nm_sent--;
   1146 		}
   1147 
   1148 		/*
   1149 		 * Check for too many retries on soft mount.
   1150 		 * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
   1151 		 */
   1152 		if (++rep->r_rexmit > NFS_MAXREXMIT)
   1153 			rep->r_rexmit = NFS_MAXREXMIT;
   1154 
   1155 		/*
   1156 		 * Check for server not responding
   1157 		 */
   1158 		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
   1159 		     rep->r_rexmit > NFS_FISHY) {
   1160 			nfs_msg(rep->r_procp,
   1161 			    nmp->nm_mountp->mnt_stat.f_mntfromname,
   1162 			    "not responding");
   1163 			rep->r_flags |= R_TPRINTFMSG;
   1164 		}
   1165 		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
   1166 			nfsstats.rpctimeouts++;
   1167 			rep->r_flags |= R_SOFTTERM;
   1168 			continue;
   1169 		}
   1170 		if (nmp->nm_sotype != SOCK_DGRAM)
   1171 			continue;
   1172 
   1173 		/*
   1174 		 * If there is enough space and the window allows..
   1175 		 *	Resend it
   1176 		 */
   1177 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
   1178 		       nmp->nm_sent < nmp->nm_window &&
   1179 		       (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
   1180 			nfsstats.rpcretries++;
   1181 			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
   1182 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1183 			    (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0);
   1184 			else
   1185 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1186 			    nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0);
   1187 			if (error) {
   1188 				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
   1189 					so->so_error = 0;
   1190 			} else {
   1191 				/*
   1192 				 * We need to time the request even though we
   1193 				 * are retransmitting.
   1194 				 */
   1195 				nmp->nm_rtt = 0;
   1196 				nmp->nm_sent++;
   1197 				rep->r_flags |= (R_SENT|R_TIMING);
   1198 				rep->r_timer = rep->r_timerinit;
   1199 			}
   1200 		}
   1201 	}
   1202 	splx(s);
   1203 	timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ);
   1204 }
   1205 
   1206 /*
   1207  * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
   1208  * used here. The timer state is held in the nfsmount structure and
   1209  * a single request is used to clock the response. When successful
   1210  * the rtt smoothing in nfs_updatetimer is used, when failed the backoff
   1211  * is done by nfs_backofftimer. We also log failure messages in these
   1212  * routines.
   1213  *
   1214  * Congestion variables are held in the nfshost structure which
   1215  * is referenced by nfsmounts and shared per-server. This separation
   1216  * makes it possible to do per-mount timing which allows varying disk
   1217  * access times to be dealt with, while preserving a network oriented
   1218  * congestion control scheme.
   1219  *
   1220  * The windowing implements the Jacobson/Karels slowstart algorithm
   1221  * with adjusted scaling factors. We start with one request, then send
   1222  * 4 more after each success until the ssthresh limit is reached, then
   1223  * we increment at a rate proportional to the window. On failure, we
   1224  * remember 3/4 the current window and clamp the send limit to 1. Note
   1225  * ICMP source quench is not reflected in so->so_error so we ignore that
   1226  * for now.
   1227  *
   1228  * NFS behaves much more like a transport protocol with these changes,
   1229  * shedding the teenage pedal-to-the-metal tendencies of "other"
   1230  * implementations.
   1231  *
   1232  * Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
   1233  */
   1234 
   1235 /*
   1236  * The TCP algorithm was not forgiving enough. Because the NFS server
   1237  * responds only after performing lookups/diskio/etc, we have to be
   1238  * more prepared to accept a spiky variance. The TCP algorithm is:
   1239  * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
   1240  */
   1241 #define NFS_RTO(nmp)	(((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
   1242 
   1243 nfs_updatetimer(nmp)
   1244 	register struct nfsmount *nmp;
   1245 {
   1246 
   1247 	/* If retransmitted, clear and return */
   1248 	if (nmp->nm_rexmit || nmp->nm_currexmit) {
   1249 		nmp->nm_rexmit = nmp->nm_currexmit = 0;
   1250 		return;
   1251 	}
   1252 	/* If have a measurement, do smoothing */
   1253 	if (nmp->nm_srtt) {
   1254 		register short delta;
   1255 		delta = nmp->nm_rtt - (nmp->nm_srtt >> 3);
   1256 		if ((nmp->nm_srtt += delta) <= 0)
   1257 			nmp->nm_srtt = 1;
   1258 		if (delta < 0)
   1259 			delta = -delta;
   1260 		delta -= (nmp->nm_rttvar >> 2);
   1261 		if ((nmp->nm_rttvar += delta) <= 0)
   1262 			nmp->nm_rttvar = 1;
   1263 	/* Else initialize */
   1264 	} else {
   1265 		nmp->nm_rttvar = nmp->nm_rtt << 1;
   1266 		if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2;
   1267 		nmp->nm_srtt = nmp->nm_rttvar << 2;
   1268 	}
   1269 	/* Compute new Retransmission TimeOut and clip */
   1270 	nmp->nm_rto = NFS_RTO(nmp);
   1271 	if (nmp->nm_rto < NFS_MINTIMEO)
   1272 		nmp->nm_rto = NFS_MINTIMEO;
   1273 	else if (nmp->nm_rto > NFS_MAXTIMEO)
   1274 		nmp->nm_rto = NFS_MAXTIMEO;
   1275 
   1276 	/* Update window estimate */
   1277 	if (nmp->nm_window < nmp->nm_ssthresh)	/* quickly */
   1278 		nmp->nm_window += 4;
   1279 	else {						/* slowly */
   1280 		register long incr = ++nmp->nm_winext;
   1281 		incr = (incr * incr) / nmp->nm_window;
   1282 		if (incr > 0) {
   1283 			nmp->nm_winext = 0;
   1284 			++nmp->nm_window;
   1285 		}
   1286 	}
   1287 	if (nmp->nm_window > NFS_MAXWINDOW)
   1288 		nmp->nm_window = NFS_MAXWINDOW;
   1289 }
   1290 
   1291 nfs_backofftimer(nmp)
   1292 	register struct nfsmount *nmp;
   1293 {
   1294 	register unsigned long newrto;
   1295 
   1296 	/* Clip shift count */
   1297 	if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto)
   1298 		nmp->nm_rexmit = 8 * sizeof nmp->nm_rto;
   1299 	/* Back off RTO exponentially */
   1300 	newrto = NFS_RTO(nmp);
   1301 	newrto <<= (nmp->nm_rexmit - 1);
   1302 	if (newrto == 0 || newrto > NFS_MAXTIMEO)
   1303 		newrto = NFS_MAXTIMEO;
   1304 	nmp->nm_rto = newrto;
   1305 
   1306 	/* If too many retries, message, assume a bogus RTT and re-measure */
   1307 	if (nmp->nm_currexmit < nmp->nm_rexmit) {
   1308 		nmp->nm_currexmit = nmp->nm_rexmit;
   1309 		if (nmp->nm_currexmit >= nfsrexmtthresh) {
   1310 			if (nmp->nm_currexmit == nfsrexmtthresh) {
   1311 				nmp->nm_rttvar += (nmp->nm_srtt >> 2);
   1312 				nmp->nm_srtt = 0;
   1313 			}
   1314 		}
   1315 	}
   1316 	/* Close down window but remember this point (3/4 current) for later */
   1317 	nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2;
   1318 	nmp->nm_window = 1;
   1319 	nmp->nm_winext = 0;
   1320 }
   1321 
   1322 /*
   1323  * Test for a termination signal pending on procp.
   1324  * This is used for NFSMNT_INT mounts.
   1325  */
   1326 nfs_sigintr(p)
   1327 	register struct proc *p;
   1328 {
   1329 	if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) &
   1330 	    NFSINT_SIGMASK))
   1331 		return (1);
   1332 	else
   1333 		return (0);
   1334 }
   1335 
   1336 nfs_msg(p, server, msg)
   1337 	struct proc *p;
   1338 	char *server, *msg;
   1339 {
   1340 	tpr_t tpr;
   1341 
   1342 	if (p)
   1343 		tpr = tprintf_open(p);
   1344 	else
   1345 		tpr = NULL;
   1346 	tprintf(tpr, "nfs server %s: %s\n", server, msg);
   1347 	tprintf_close(tpr);
   1348 }
   1349 
   1350 /*
   1351  * Lock a socket against others.
   1352  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
   1353  * and also to avoid race conditions between the processes with nfs requests
   1354  * in progress when a reconnect is necessary.
   1355  */
   1356 nfs_solock(flagp)
   1357 	register int *flagp;
   1358 {
   1359 
   1360 	while (*flagp & NFSMNT_SCKLOCK) {
   1361 		*flagp |= NFSMNT_WANTSCK;
   1362 		(void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0);
   1363 	}
   1364 	*flagp |= NFSMNT_SCKLOCK;
   1365 }
   1366 
   1367 /*
   1368  * Unlock the stream socket for others.
   1369  */
   1370 nfs_sounlock(flagp)
   1371 	register int *flagp;
   1372 {
   1373 
   1374 	if ((*flagp & NFSMNT_SCKLOCK) == 0)
   1375 		panic("nfs sounlock");
   1376 	*flagp &= ~NFSMNT_SCKLOCK;
   1377 	if (*flagp & NFSMNT_WANTSCK) {
   1378 		*flagp &= ~NFSMNT_WANTSCK;
   1379 		wakeup((caddr_t)flagp);
   1380 	}
   1381 }
   1382 
   1383 /*
   1384  * This function compares two net addresses by family and returns TRUE
   1385  * if they are the same.
   1386  * If there is any doubt, return FALSE.
   1387  */
   1388 nfs_netaddr_match(nam1, nam2)
   1389 	struct mbuf *nam1, *nam2;
   1390 {
   1391 	register struct sockaddr *saddr1, *saddr2;
   1392 
   1393 	saddr1 = mtod(nam1, struct sockaddr *);
   1394 	saddr2 = mtod(nam2, struct sockaddr *);
   1395 	if (saddr1->sa_family != saddr2->sa_family)
   1396 		return (0);
   1397 
   1398 	/*
   1399 	 * Must do each address family separately since unused fields
   1400 	 * are undefined values and not always zeroed.
   1401 	 */
   1402 	switch (saddr1->sa_family) {
   1403 	case AF_INET:
   1404 		if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr ==
   1405 		    ((struct sockaddr_in *)saddr2)->sin_addr.s_addr)
   1406 			return (1);
   1407 		break;
   1408 	default:
   1409 		break;
   1410 	};
   1411 	return (0);
   1412 }
   1413 
   1414 /*
   1415  * Check the hostname fields for nfsd's mask and match fields.
   1416  * By address family:
   1417  * - Bitwise AND the mask with the host address field
   1418  * - Compare for == with match
   1419  * return TRUE if not equal
   1420  */
   1421 nfs_badnam(nam, msk, mtch)
   1422 	register struct mbuf *nam, *msk, *mtch;
   1423 {
   1424 	switch (mtod(nam, struct sockaddr *)->sa_family) {
   1425 	case AF_INET:
   1426 		return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr &
   1427 			 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) !=
   1428 			 mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr);
   1429 	default:
   1430 		printf("nfs_badmatch, unknown sa_family\n");
   1431 		return (0);
   1432 	};
   1433 }
   1434