Home | History | Annotate | Line # | Download | only in nfs
nfs_socket.c revision 1.8
      1 /*
      2  * Copyright (c) 1989, 1991 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Rick Macklem at The University of Guelph.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  *
     36  *	from: @(#)nfs_socket.c	7.23 (Berkeley) 4/20/91
     37  *	$Id: nfs_socket.c,v 1.8 1993/09/07 15:41:41 ws Exp $
     38  */
     39 
     40 /*
     41  * Socket operations for use by nfs
     42  */
     43 
     44 #include "param.h"
     45 #include "systm.h"
     46 #include "proc.h"
     47 #include "mount.h"
     48 #include "kernel.h"
     49 #include "malloc.h"
     50 #include "mbuf.h"
     51 #include "namei.h"
     52 #include "vnode.h"
     53 #include "domain.h"
     54 #include "protosw.h"
     55 #include "socket.h"
     56 #include "socketvar.h"
     57 #include "syslog.h"
     58 #include "tprintf.h"
     59 #include "../netinet/in.h"
     60 #include "../netinet/tcp.h"
     61 
     62 #include "rpcv2.h"
     63 #include "nfsv2.h"
     64 #include "nfs.h"
     65 #include "xdr_subs.h"
     66 #include "nfsm_subs.h"
     67 #include "nfsmount.h"
     68 
     69 #define	TRUE	1
     70 #define	FALSE	0
     71 
     72 /*
     73  * External data, mostly RPC constants in XDR form
     74  */
     75 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
     76 	rpc_msgaccepted, rpc_call;
     77 extern u_long nfs_prog, nfs_vers;
     78 /* Maybe these should be bits in a u_long ?? */
     79 /*
     80  * Static array that defines which nfs rpc's are nonidempotent
     81  */
     82 int nonidempotent[NFS_NPROCS] = {
     83 	FALSE,
     84 	FALSE,
     85 	TRUE,
     86 	FALSE,
     87 	FALSE,
     88 	FALSE,
     89 	FALSE,
     90 	FALSE,
     91 	TRUE,
     92 	TRUE,
     93 	TRUE,
     94 	TRUE,
     95 	TRUE,
     96 	TRUE,
     97 	TRUE,
     98 	TRUE,
     99 	FALSE,
    100 	FALSE,
    101 };
    102 static int compressrequest[NFS_NPROCS] = {
    103 	FALSE,
    104 	TRUE,
    105 	TRUE,
    106 	FALSE,
    107 	TRUE,
    108 	TRUE,
    109 	TRUE,
    110 	FALSE,
    111 	FALSE,
    112 	TRUE,
    113 	TRUE,
    114 	TRUE,
    115 	TRUE,
    116 	TRUE,
    117 	TRUE,
    118 	TRUE,
    119 	TRUE,
    120 	TRUE,
    121 };
    122 int	nfs_sbwait();
    123 void	nfs_disconnect();
    124 struct mbuf *nfs_compress(), *nfs_uncompress();
    125 
    126 
    127 struct nfsreq nfsreqh;
    128 int nfsrexmtthresh = NFS_FISHY;
    129 int nfs_tcpnodelay = 1;
    130 
    131 /*
    132  * Initialize sockets and congestion for a new NFS connection.
    133  * We do not free the sockaddr if error.
    134  */
    135 nfs_connect(nmp)
    136 	register struct nfsmount *nmp;
    137 {
    138 	register struct socket *so;
    139 	struct sockaddr *saddr;					/* 08 Sep 92*/
    140 	int s, error, bufsize;
    141 	struct mbuf *m;
    142 	struct sockaddr_in *sin;				/* 08 Sep 92*/
    143 	u_short tport;						/* 08 Sep 92*/
    144 
    145 	nmp->nm_so = (struct socket *)0;
    146 	saddr = mtod(nmp->nm_nam, struct sockaddr *);		/* 08 Sep 92*/
    147 	if (error = socreate(saddr->sa_family,			/* 08 Sep 92*/
    148 		&nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
    149 		goto bad;
    150 	so = nmp->nm_so;
    151 	nmp->nm_soflags = so->so_proto->pr_flags;
    152 
    153 	/*
    154 	 * 08 Sep 92
    155 	 *
    156 	 * Some servers require that the client port be a reserved port number.
    157 	 */
    158 	if (saddr->sa_family == AF_INET) {
    159 		MGET(m, M_WAIT, MT_SONAME);
    160 		sin = mtod(m, struct sockaddr_in *);
    161 		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
    162 		sin->sin_family = AF_INET;
    163 		sin->sin_addr.s_addr = INADDR_ANY;
    164 		tport = IPPORT_RESERVED - 1;
    165 		sin->sin_port = htons(tport);
    166 		while (sobind(so, m) == EADDRINUSE &&
    167 		       --tport > IPPORT_RESERVED / 2)
    168 			sin->sin_port = htons(tport);
    169 		m_freem(m);
    170 	}
    171 
    172 	if (nmp->nm_sotype == SOCK_DGRAM)
    173 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR),
    174 		    NFS_MAXPACKET);
    175 	else
    176 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)),
    177 		    NFS_MAXPACKET + sizeof(u_long));
    178 	if (error = soreserve(so, bufsize, bufsize))
    179 		goto bad;
    180 
    181 	/*
    182 	 * Protocols that do not require connections may be optionally left
    183 	 * unconnected for servers that reply from a port other than NFS_PORT.
    184 	 */
    185 	if (nmp->nm_flag & NFSMNT_NOCONN) {
    186 		if (nmp->nm_soflags & PR_CONNREQUIRED) {
    187 			error = ENOTCONN;
    188 			goto bad;
    189 		}
    190 	} else {
    191 		if (error = soconnect(so, nmp->nm_nam))
    192 			goto bad;
    193 
    194 		/*
    195 		 * Wait for the connection to complete. Cribbed from the
    196 		 * connect system call but with the wait at negative prio.
    197 		 */
    198 		s = splnet();
    199 		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
    200 			(void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0);
    201 		splx(s);
    202 		if (so->so_error) {
    203 			error = so->so_error;
    204 			goto bad;
    205 		}
    206 	}
    207 	if (nmp->nm_sotype == SOCK_DGRAM) {
    208 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    209 			so->so_rcv.sb_timeo = (5 * hz);
    210 			so->so_snd.sb_timeo = (5 * hz);
    211 		} else {
    212 			so->so_rcv.sb_timeo = 0;
    213 			so->so_snd.sb_timeo = 0;
    214 		}
    215 		nmp->nm_rto = NFS_TIMEO;
    216 	} else {
    217 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    218 			so->so_rcv.sb_timeo = (5 * hz);
    219 			so->so_snd.sb_timeo = (5 * hz);
    220 		} else {
    221 			so->so_rcv.sb_timeo = 0;
    222 			so->so_snd.sb_timeo = 0;
    223 		}
    224 		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    225 			MGET(m, M_WAIT, MT_SOOPTS);
    226 			*mtod(m, int *) = 1;
    227 			m->m_len = sizeof(int);
    228 			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
    229 		}
    230 		if (so->so_proto->pr_domain->dom_family == AF_INET &&
    231 		    so->so_proto->pr_protocol == IPPROTO_TCP &&
    232 		    nfs_tcpnodelay) {
    233 			MGET(m, M_WAIT, MT_SOOPTS);
    234 			*mtod(m, int *) = 1;
    235 			m->m_len = sizeof(int);
    236 			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
    237 		}
    238 		nmp->nm_rto = 10 * NFS_TIMEO;		/* XXX */
    239 	}
    240 	so->so_rcv.sb_flags |= SB_NOINTR;
    241 	so->so_snd.sb_flags |= SB_NOINTR;
    242 
    243 	/* Initialize other non-zero congestion variables */
    244 	nmp->nm_window = 2;			/* Initial send window */
    245 	nmp->nm_ssthresh = NFS_MAXWINDOW;	/* Slowstart threshold */
    246 	nmp->nm_rttvar = nmp->nm_rto << 1;
    247 	nmp->nm_sent = 0;
    248 	nmp->nm_currexmit = 0;
    249 	return (0);
    250 
    251 bad:
    252 	nfs_disconnect(nmp);
    253 	return (error);
    254 }
    255 
    256 /*
    257  * Reconnect routine:
    258  * Called when a connection is broken on a reliable protocol.
    259  * - clean up the old socket
    260  * - nfs_connect() again
    261  * - set R_MUSTRESEND for all outstanding requests on mount point
    262  * If this fails the mount point is DEAD!
    263  * nb: Must be called with the nfs_solock() set on the mount point.
    264  */
    265 nfs_reconnect(rep, nmp)
    266 	register struct nfsreq *rep;
    267 	register struct nfsmount *nmp;
    268 {
    269 	register struct nfsreq *rp;
    270 	int error;
    271 
    272 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    273 	    "trying reconnect");
    274 	while (error = nfs_connect(nmp)) {
    275 #ifdef lint
    276 		error = error;
    277 #endif /* lint */
    278 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp))
    279 			return (EINTR);
    280 		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
    281 	}
    282 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    283 	    "reconnected");
    284 
    285 	/*
    286 	 * Loop through outstanding request list and fix up all requests
    287 	 * on old socket.
    288 	 */
    289 	rp = nfsreqh.r_next;
    290 	while (rp != &nfsreqh) {
    291 		if (rp->r_nmp == nmp)
    292 			rp->r_flags |= R_MUSTRESEND;
    293 		rp = rp->r_next;
    294 	}
    295 	return (0);
    296 }
    297 
    298 /*
    299  * NFS disconnect. Clean up and unlink.
    300  */
    301 void
    302 nfs_disconnect(nmp)
    303 	register struct nfsmount *nmp;
    304 {
    305 	register struct socket *so;
    306 
    307 	if (nmp->nm_so) {
    308 		so = nmp->nm_so;
    309 		nmp->nm_so = (struct socket *)0;
    310 		soshutdown(so, 2);
    311 		soclose(so);
    312 	}
    313 }
    314 
    315 /*
    316  * This is the nfs send routine. For connection based socket types, it
    317  * must be called with an nfs_solock() on the socket.
    318  * "rep == NULL" indicates that it has been called from a server.
    319  */
    320 nfs_send(so, nam, top, rep)
    321 	register struct socket *so;
    322 	struct mbuf *nam;
    323 	register struct mbuf *top;
    324 	struct nfsreq *rep;
    325 {
    326 	struct mbuf *sendnam;
    327 	int error, soflags;
    328 
    329 	if (rep) {
    330 		if (rep->r_flags & R_SOFTTERM) {
    331 			m_freem(top);
    332 			return (EINTR);
    333 		}
    334 		if (rep->r_nmp->nm_so == NULL &&
    335 		    (error = nfs_reconnect(rep, rep->r_nmp)))
    336 			return (error);
    337 		rep->r_flags &= ~R_MUSTRESEND;
    338 		so = rep->r_nmp->nm_so;
    339 		soflags = rep->r_nmp->nm_soflags;
    340 	} else
    341 		soflags = so->so_proto->pr_flags;
    342 	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
    343 		sendnam = (struct mbuf *)0;
    344 	else
    345 		sendnam = nam;
    346 
    347 	error = sosend(so, sendnam, (struct uio *)0, top,
    348 		(struct mbuf *)0, 0);
    349 	if (error == EWOULDBLOCK && rep) {
    350 		if (rep->r_flags & R_SOFTTERM)
    351 			error = EINTR;
    352 		else {
    353 			rep->r_flags |= R_MUSTRESEND;
    354 			error = 0;
    355 		}
    356 	}
    357 	/*
    358 	 * Ignore socket errors??
    359 	 */
    360 	if (error && error != EINTR && error != ERESTART)
    361 		error = 0;
    362 	return (error);
    363 }
    364 
    365 /*
    366  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
    367  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
    368  * Mark and consolidate the data into a new mbuf list.
    369  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
    370  *     small mbufs.
    371  * For SOCK_STREAM we must be very careful to read an entire record once
    372  * we have read any of it, even if the system call has been interrupted.
    373  */
    374 nfs_receive(so, aname, mp, rep)
    375 	register struct socket *so;
    376 	struct mbuf **aname;
    377 	struct mbuf **mp;
    378 	register struct nfsreq *rep;
    379 {
    380 	struct uio auio;
    381 	struct iovec aio;
    382 	register struct mbuf *m;
    383 	struct mbuf *m2, *mnew, **mbp;
    384 	caddr_t fcp, tcp;
    385 	u_long len;
    386 	struct mbuf **getnam;
    387 	int error, siz, mlen, soflags, rcvflg;
    388 
    389 	/*
    390 	 * Set up arguments for soreceive()
    391 	 */
    392 	*mp = (struct mbuf *)0;
    393 	*aname = (struct mbuf *)0;
    394 	if (rep)
    395 		soflags = rep->r_nmp->nm_soflags;
    396 	else
    397 		soflags = so->so_proto->pr_flags;
    398 
    399 	/*
    400 	 * For reliable protocols, lock against other senders/receivers
    401 	 * in case a reconnect is necessary.
    402 	 * For SOCK_STREAM, first get the Record Mark to find out how much
    403 	 * more there is to get.
    404 	 * We must lock the socket against other receivers
    405 	 * until we have an entire rpc request/reply.
    406 	 */
    407 	if (soflags & PR_CONNREQUIRED) {
    408 tryagain:
    409 		/*
    410 		 * Check for fatal errors and resending request.
    411 		 */
    412 		if (rep) {
    413 			/*
    414 			 * Ugh: If a reconnect attempt just happened, nm_so
    415 			 * would have changed. NULL indicates a failed
    416 			 * attempt that has essentially shut down this
    417 			 * mount point.
    418 			 */
    419 			if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL ||
    420 				(rep->r_flags & R_SOFTTERM))
    421 				return (EINTR);
    422 			while (rep->r_flags & R_MUSTRESEND) {
    423 				m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
    424 				nfsstats.rpcretries++;
    425 				if (error = nfs_send(so, rep->r_nmp->nm_nam, m,
    426 					rep))
    427 					goto errout;
    428 			}
    429 		}
    430 		if ((soflags & PR_ATOMIC) == 0) {
    431 			aio.iov_base = (caddr_t) &len;
    432 			aio.iov_len = sizeof(u_long);
    433 			auio.uio_iov = &aio;
    434 			auio.uio_iovcnt = 1;
    435 			auio.uio_segflg = UIO_SYSSPACE;
    436 			auio.uio_rw = UIO_READ;
    437 			auio.uio_procp = (struct proc *)0;
    438 			auio.uio_offset = 0;
    439 			auio.uio_resid = sizeof(u_long);
    440 			do {
    441 			    rcvflg = MSG_WAITALL;
    442 			    error = soreceive(so, (struct mbuf **)0, &auio,
    443 				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
    444 			    if (error == EWOULDBLOCK && rep) {
    445 				if (rep->r_flags & R_SOFTTERM)
    446 					return (EINTR);
    447 				if (rep->r_flags & R_MUSTRESEND)
    448 					goto tryagain;
    449 			    }
    450 			} while (error == EWOULDBLOCK);
    451 			if (!error && auio.uio_resid > 0) {
    452 			    if (rep)
    453 				log(LOG_INFO,
    454 				   "short receive (%d/%d) from nfs server %s\n",
    455 				   sizeof(u_long) - auio.uio_resid,
    456 				   sizeof(u_long),
    457 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    458 			    error = EPIPE;
    459 			}
    460 			if (error)
    461 				goto errout;
    462 			len = ntohl(len) & ~0x80000000;
    463 			/*
    464 			 * This is SERIOUS! We are out of sync with the sender
    465 			 * and forcing a disconnect/reconnect is all I can do.
    466 			 */
    467 			if (len > NFS_MAXPACKET) {
    468 			    if (rep)
    469 				log(LOG_ERR, "%s (%d) from nfs server %s\n",
    470 				    "impossible packet length",
    471 				    len,
    472 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    473 			    error = EFBIG;
    474 			    goto errout;
    475 			}
    476 			auio.uio_resid = len;
    477 			do {
    478 			    rcvflg = MSG_WAITALL;
    479 			    error =  soreceive(so, (struct mbuf **)0,
    480 				&auio, mp, (struct mbuf **)0, &rcvflg);
    481 			} while (error == EWOULDBLOCK || error == EINTR ||
    482 				 error == ERESTART);
    483 			if (!error && auio.uio_resid > 0) {
    484 			    if (rep)
    485 				log(LOG_INFO,
    486 				   "short receive (%d/%d) from nfs server %s\n",
    487 				   len - auio.uio_resid, len,
    488 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    489 			    error = EPIPE;
    490 			}
    491 		} else {
    492 			auio.uio_resid = len = 1000000;	/* Anything Big */
    493 			do {
    494 			    rcvflg = 0;
    495 			    error =  soreceive(so, (struct mbuf **)0,
    496 				&auio, mp, (struct mbuf **)0, &rcvflg);
    497 			    if (error == EWOULDBLOCK && rep) {
    498 				if (rep->r_flags & R_SOFTTERM)
    499 					return (EINTR);
    500 				if (rep->r_flags & R_MUSTRESEND)
    501 					goto tryagain;
    502 			    }
    503 			} while (error == EWOULDBLOCK);
    504 			if (!error && *mp == NULL)
    505 				error = EPIPE;
    506 			len -= auio.uio_resid;
    507 		}
    508 errout:
    509 		if (error && rep && error != EINTR && error != ERESTART) {
    510 			m_freem(*mp);
    511 			*mp = (struct mbuf *)0;
    512 			if (error != EPIPE && rep)
    513 				log(LOG_INFO,
    514 				    "receive error %d from nfs server %s\n",
    515 				    error,
    516 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    517 			nfs_disconnect(rep->r_nmp);
    518 			error = nfs_reconnect(rep, rep->r_nmp);
    519 			if (!error)
    520 				goto tryagain;
    521 		}
    522 	} else {
    523 		if (so->so_state & SS_ISCONNECTED)
    524 			getnam = (struct mbuf **)0;
    525 		else
    526 			getnam = aname;
    527 		auio.uio_resid = len = 1000000;
    528 		do {
    529 			rcvflg = 0;
    530 			error =  soreceive(so, getnam, &auio, mp,
    531 				(struct mbuf **)0, &rcvflg);
    532 			if (error == EWOULDBLOCK && rep &&
    533 			    (rep->r_flags & R_SOFTTERM))
    534 				return (EINTR);
    535 		} while (error == EWOULDBLOCK);
    536 		len -= auio.uio_resid;
    537 	}
    538 	if (error) {
    539 		m_freem(*mp);
    540 		*mp = (struct mbuf *)0;
    541 	}
    542 	/*
    543 	 * Search for any mbufs that are not a multiple of 4 bytes long.
    544 	 * These could cause pointer alignment problems, so copy them to
    545 	 * well aligned mbufs.
    546 	 */
    547 	m = *mp;
    548 	mbp = mp;
    549 	while (m) {
    550 		/*
    551 		 * All this for something that may never happen.
    552 		 */
    553 		if (m->m_next && (m->m_len & 0x3)) {
    554 			printf("nfs_rcv odd length!\n");
    555 			mlen = 0;
    556 			while (m) {
    557 				fcp = mtod(m, caddr_t);
    558 				while (m->m_len > 0) {
    559 					if (mlen == 0) {
    560 						MGET(m2, M_WAIT, MT_DATA);
    561 						if (len >= MINCLSIZE)
    562 							MCLGET(m2, M_WAIT);
    563 						m2->m_len = 0;
    564 						mlen = M_TRAILINGSPACE(m2);
    565 						tcp = mtod(m2, caddr_t);
    566 						*mbp = m2;
    567 						mbp = &m2->m_next;
    568 					}
    569 					siz = MIN(mlen, m->m_len);
    570 					bcopy(fcp, tcp, siz);
    571 					m2->m_len += siz;
    572 					mlen -= siz;
    573 					len -= siz;
    574 					tcp += siz;
    575 					m->m_len -= siz;
    576 					fcp += siz;
    577 				}
    578 				MFREE(m, mnew);
    579 				m = mnew;
    580 			}
    581 			break;
    582 		}
    583 		len -= m->m_len;
    584 		mbp = &m->m_next;
    585 		m = m->m_next;
    586 	}
    587 	return (error);
    588 }
    589 
    590 /*
    591  * Implement receipt of reply on a socket.
    592  * We must search through the list of received datagrams matching them
    593  * with outstanding requests using the xid, until ours is found.
    594  */
    595 /* ARGSUSED */
    596 nfs_reply(nmp, myrep)
    597 	struct nfsmount *nmp;
    598 	struct nfsreq *myrep;
    599 {
    600 	register struct mbuf *m;
    601 	register struct nfsreq *rep;
    602 	register int error = 0;
    603 	u_long rxid;
    604 	struct mbuf *mp, *nam;
    605 	char *cp;
    606 	int cnt, xfer;
    607 
    608 	/*
    609 	 * Loop around until we get our own reply
    610 	 */
    611 	for (;;) {
    612 		/*
    613 		 * Lock against other receivers so that I don't get stuck in
    614 		 * sbwait() after someone else has received my reply for me.
    615 		 * Also necessary for connection based protocols to avoid
    616 		 * race conditions during a reconnect.
    617 		 */
    618 		nfs_solock(&nmp->nm_flag);
    619 		/* Already received, bye bye */
    620 		if (myrep->r_mrep != NULL) {
    621 			nfs_sounlock(&nmp->nm_flag);
    622 			return (0);
    623 		}
    624 		/*
    625 		 * Get the next Rpc reply off the socket
    626 		 */
    627 		if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) {
    628 			nfs_sounlock(&nmp->nm_flag);
    629 
    630 			/*
    631 			 * Ignore routing errors on connectionless protocols??
    632 			 */
    633 			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
    634 				nmp->nm_so->so_error = 0;
    635 				continue;
    636 			}
    637 
    638 			/*
    639 			 * Otherwise cleanup and return a fatal error.
    640 			 */
    641 			if (myrep->r_flags & R_TIMING) {
    642 				myrep->r_flags &= ~R_TIMING;
    643 				nmp->nm_rtt = -1;
    644 			}
    645 			if (myrep->r_flags & R_SENT) {
    646 				myrep->r_flags &= ~R_SENT;
    647 				nmp->nm_sent--;
    648 			}
    649 			return (error);
    650 		}
    651 
    652 		/*
    653 		 * Get the xid and check that it is an rpc reply
    654 		 */
    655 		m = mp;
    656 		while (m && m->m_len == 0)
    657 			m = m->m_next;
    658 		if (m == NULL) {
    659 			nfsstats.rpcinvalid++;
    660 			m_freem(mp);
    661 			nfs_sounlock(&nmp->nm_flag);
    662 			continue;
    663 		}
    664 		bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED);
    665 		/*
    666 		 * Loop through the request list to match up the reply
    667 		 * Iff no match, just drop the datagram
    668 		 */
    669 		m = mp;
    670 		rep = nfsreqh.r_next;
    671 		while (rep != &nfsreqh) {
    672 			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
    673 				/* Found it.. */
    674 				rep->r_mrep = m;
    675 				/*
    676 				 * Update timing
    677 				 */
    678 				if (rep->r_flags & R_TIMING) {
    679 					nfs_updatetimer(rep->r_nmp);
    680 					rep->r_flags &= ~R_TIMING;
    681 					rep->r_nmp->nm_rtt = -1;
    682 				}
    683 				if (rep->r_flags & R_SENT) {
    684 					rep->r_flags &= ~R_SENT;
    685 					rep->r_nmp->nm_sent--;
    686 				}
    687 				break;
    688 			}
    689 			rep = rep->r_next;
    690 		}
    691 		nfs_sounlock(&nmp->nm_flag);
    692 		if (nam)
    693 			m_freem(nam);
    694 		/*
    695 		 * If not matched to a request, drop it.
    696 		 * If it's mine, get out.
    697 		 */
    698 		if (rep == &nfsreqh) {
    699 			nfsstats.rpcunexpected++;
    700 			m_freem(m);
    701 		} else if (rep == myrep)
    702 			return (0);
    703 	}
    704 }
    705 
    706 /*
    707  * nfs_request - goes something like this
    708  *	- fill in request struct
    709  *	- links it into list
    710  *	- calls nfs_send() for first transmit
    711  *	- calls nfs_receive() to get reply
    712  *	- break down rpc header and return with nfs reply pointed to
    713  *	  by mrep or error
    714  * nb: always frees up mreq mbuf list
    715  */
    716 nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp)
    717 	struct vnode *vp;
    718 	struct mbuf *mreq;
    719 	u_long xid;
    720 	int procnum;
    721 	struct proc *procp;
    722 	int tryhard;
    723 	struct mount *mp;
    724 	struct mbuf **mrp;
    725 	struct mbuf **mdp;
    726 	caddr_t *dposp;
    727 {
    728 	register struct mbuf *m, *mrep;
    729 	register struct nfsreq *rep;
    730 	register u_long *tl;
    731 	register int len;
    732 	struct nfsmount *nmp;
    733 	struct mbuf *md;
    734 	struct nfsreq *reph;
    735 	caddr_t dpos;
    736 	char *cp2;
    737 	int t1;
    738 	int s, compressed;
    739 	int error = 0;
    740 
    741 	nmp = VFSTONFS(mp);
    742 	m = mreq;
    743 	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
    744 	rep->r_xid = xid;
    745 	rep->r_nmp = nmp;
    746 	rep->r_vp = vp;
    747 	rep->r_procp = procp;
    748 	if ((nmp->nm_flag & NFSMNT_SOFT) ||
    749 	    ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard))
    750 		rep->r_retry = nmp->nm_retry;
    751 	else
    752 		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
    753 	rep->r_flags = rep->r_rexmit = 0;
    754 	/*
    755 	 * Three cases:
    756 	 * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
    757 	 * - idempotent requests on SOCK_DGRAM use 0
    758 	 * - Reliable transports, NFS_RELIABLETIMEO
    759 	 *   Timeouts are still done on reliable transports to ensure detection
    760 	 *   of excessive connection delay.
    761 	 */
    762 	if (nmp->nm_sotype != SOCK_DGRAM)
    763 		rep->r_timerinit = -NFS_RELIABLETIMEO;
    764 	else if (nonidempotent[procnum])
    765 		rep->r_timerinit = -NFS_MINIDEMTIMEO;
    766 	else
    767 		rep->r_timerinit = 0;
    768 	rep->r_timer = rep->r_timerinit;
    769 	rep->r_mrep = NULL;
    770 	len = 0;
    771 	while (m) {
    772 		len += m->m_len;
    773 		m = m->m_next;
    774 	}
    775 	mreq->m_pkthdr.len = len;
    776 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
    777 	compressed = 0;
    778 	m = mreq;
    779 	if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) {
    780 		mreq = nfs_compress(mreq);
    781 		if (mreq != m) {
    782 			len = mreq->m_pkthdr.len;
    783 			compressed++;
    784 		}
    785 	}
    786 	/*
    787 	 * For non-atomic protocols, insert a Sun RPC Record Mark.
    788 	 */
    789 	if ((nmp->nm_soflags & PR_ATOMIC) == 0) {
    790 		M_PREPEND(mreq, sizeof(u_long), M_WAIT);
    791 		*mtod(mreq, u_long *) = htonl(0x80000000 | len);
    792 	}
    793 	rep->r_mreq = mreq;
    794 
    795 	/*
    796 	 * Do the client side RPC.
    797 	 */
    798 	nfsstats.rpcrequests++;
    799 	/*
    800 	 * Chain request into list of outstanding requests. Be sure
    801 	 * to put it LAST so timer finds oldest requests first.
    802 	 */
    803 	s = splnet();
    804 	reph = &nfsreqh;
    805 	reph->r_prev->r_next = rep;
    806 	rep->r_prev = reph->r_prev;
    807 	reph->r_prev = rep;
    808 	rep->r_next = reph;
    809 	/*
    810 	 * If backing off another request or avoiding congestion, don't
    811 	 * send this one now but let timer do it. If not timing a request,
    812 	 * do it now.
    813 	 */
    814 	if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM ||
    815 	    (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) {
    816 		nmp->nm_sent++;
    817 		rep->r_flags |= R_SENT;
    818 		if (nmp->nm_rtt == -1) {
    819 			nmp->nm_rtt = 0;
    820 			rep->r_flags |= R_TIMING;
    821 		}
    822 		splx(s);
    823 		m = m_copym(mreq, 0, M_COPYALL, M_WAIT);
    824 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    825 			nfs_solock(&nmp->nm_flag);
    826 		error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
    827 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    828 			nfs_sounlock(&nmp->nm_flag);
    829 		if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error))
    830 			nmp->nm_so->so_error = error = 0;
    831 	} else
    832 		splx(s);
    833 
    834 	/*
    835 	 * Wait for the reply from our send or the timer's.
    836 	 */
    837 	if (!error)
    838 		error = nfs_reply(nmp, rep);
    839 
    840 	/*
    841 	 * RPC done, unlink the request.
    842 	 */
    843 	s = splnet();
    844 	rep->r_prev->r_next = rep->r_next;
    845 	rep->r_next->r_prev = rep->r_prev;
    846 	splx(s);
    847 
    848 	/*
    849 	 * If there was a successful reply and a tprintf msg.
    850 	 * tprintf a response.
    851 	 */
    852 	if (!error && (rep->r_flags & R_TPRINTFMSG))
    853 		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    854 		    "is alive again");
    855 	m_freem(rep->r_mreq);
    856 	mrep = rep->r_mrep;
    857 	FREE((caddr_t)rep, M_NFSREQ);
    858 	if (error)
    859 		return (error);
    860 
    861 	if (compressed)
    862 		mrep = nfs_uncompress(mrep);
    863 	md = mrep;
    864 	/*
    865 	 * break down the rpc header and check if ok
    866 	 */
    867 	dpos = mtod(md, caddr_t);
    868 	nfsm_disect(tl, u_long *, 5*NFSX_UNSIGNED);
    869 	tl += 2;
    870 	if (*tl++ == rpc_msgdenied) {
    871 		if (*tl == rpc_mismatch)
    872 			error = EOPNOTSUPP;
    873 		else
    874 			error = EACCES;
    875 		m_freem(mrep);
    876 		return (error);
    877 	}
    878 	/*
    879 	 * skip over the auth_verf, someday we may want to cache auth_short's
    880 	 * for nfs_reqhead(), but for now just dump it
    881 	 */
    882 	if (*++tl != 0) {
    883 		len = nfsm_rndup(fxdr_unsigned(long, *tl));
    884 		nfsm_adv(len);
    885 	}
    886 	nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    887 	/* 0 == ok */
    888 	if (*tl == 0) {
    889 		nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    890 		if (*tl != 0) {
    891 			error = fxdr_unsigned(int, *tl);
    892 			m_freem(mrep);
    893 			return (error);
    894 		}
    895 		*mrp = mrep;
    896 		*mdp = md;
    897 		*dposp = dpos;
    898 		return (0);
    899 	}
    900 	m_freem(mrep);
    901 	return (EPROTONOSUPPORT);
    902 nfsmout:
    903 	return (error);
    904 }
    905 
    906 /*
    907  * Get a request for the server main loop
    908  * - receive a request via. nfs_soreceive()
    909  * - verify it
    910  * - fill in the cred struct.
    911  */
    912 nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr,
    913 	msk, mtch, wascomp, repstat)				/* 08 Aug 92*/
    914 	struct socket *so;
    915 	u_long prog;
    916 	u_long vers;
    917 	int maxproc;
    918 	struct mbuf **nam;
    919 	struct mbuf **mrp;
    920 	struct mbuf **mdp;
    921 	caddr_t *dposp;
    922 	u_long *retxid;
    923 	u_long *procnum;
    924 	register struct ucred *cr;
    925 	struct mbuf *msk, *mtch;
    926 	int *wascomp, *repstat;					/* 08 Aug 92*/
    927 {
    928 	register int i;
    929 	register u_long *tl;
    930 	register long t1;
    931 	caddr_t dpos, cp2;
    932 	int error = 0;
    933 	struct mbuf *mrep, *md;
    934 	int len;
    935 
    936 	*repstat = 0;						/* 08 Aug 92*/
    937 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    938 		error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    939 	} else {
    940 		mrep = (struct mbuf *)0;
    941 		do {
    942 			if (mrep) {
    943 				m_freem(*nam);
    944 				m_freem(mrep);
    945 			}
    946 			error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    947 		} while (!error && nfs_badnam(*nam, msk, mtch));
    948 	}
    949 	if (error)
    950 		return (error);
    951 	md = mrep;
    952 	mrep = nfs_uncompress(mrep);
    953 	if (mrep != md) {
    954 		*wascomp = 1;
    955 		md = mrep;
    956 	} else
    957 		*wascomp = 0;
    958 	dpos = mtod(mrep, caddr_t);
    959 	nfsm_disect(tl, u_long *, 10*NFSX_UNSIGNED);
    960 	*retxid = fxdr_unsigned(u_long, *tl++);
    961 	if (*tl++ != rpc_call || *tl++ != rpc_vers) {		/* 08 Aug 92*/
    962 		*mrp = mrep;
    963 		*procnum = NFSPROC_NOOP;
    964 		*repstat = ERPCMISMATCH;
    965 		return (0);
    966 	}
    967 	if (*tl++ != prog) {
    968 		*mrp = mrep;					/* 08 Aug 92*/
    969 		*procnum = NFSPROC_NOOP;
    970 		*repstat = EPROGUNAVAIL;
    971 		return (0);
    972 	}
    973 	if (*tl++ != vers) {
    974 		*mrp = mrep;					/* 08 Aug 92*/
    975 		*procnum = NFSPROC_NOOP;
    976 		*repstat = EPROGMISMATCH;
    977 		return (0);
    978 	}
    979 	*procnum = fxdr_unsigned(u_long, *tl++);
    980 	if (*procnum == NFSPROC_NULL) {
    981 		*mrp = mrep;
    982 		return (0);
    983 	}
    984 	if (*procnum > maxproc || *tl++ != rpc_auth_unix) {
    985 		*mrp = mrep;					/* 08 Aug 92*/
    986 		*procnum = NFSPROC_NOOP;
    987 		*repstat = EPROCUNAVAIL;
    988 		return (0);
    989 	}
    990 	len = fxdr_unsigned(int, *tl++);
    991 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
    992 		m_freem(mrep);
    993 		return (EBADRPC);
    994 	}
    995 	len = fxdr_unsigned(int, *++tl);
    996 	if (len < 0 || len > NFS_MAXNAMLEN) {
    997 		m_freem(mrep);
    998 		return (EBADRPC);
    999 	}
   1000 	nfsm_adv(nfsm_rndup(len));
   1001 	nfsm_disect(tl, u_long *, 3*NFSX_UNSIGNED);
   1002 	cr->cr_uid = fxdr_unsigned(uid_t, *tl++);
   1003 	cr->cr_gid = fxdr_unsigned(gid_t, *tl++);
   1004 	len = fxdr_unsigned(int, *tl);
   1005 	if (len < 0 || len > RPCAUTH_UNIXGIDS) {
   1006 		m_freem(mrep);
   1007 		return (EBADRPC);
   1008 	}
   1009 	nfsm_disect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
   1010 	for (i = 1; i <= len; i++)
   1011 		if (i < NGROUPS)
   1012 			cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
   1013 		else
   1014 			tl++;
   1015 	cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
   1016 	/*
   1017 	 * Do we have any use for the verifier.
   1018 	 * According to the "Remote Procedure Call Protocol Spec." it
   1019 	 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
   1020 	 * For now, just skip over it
   1021 	 */
   1022 	len = fxdr_unsigned(int, *++tl);
   1023 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
   1024 		m_freem(mrep);
   1025 		return (EBADRPC);
   1026 	}
   1027 	if (len > 0)
   1028 		nfsm_adv(nfsm_rndup(len));
   1029 	*mrp = mrep;
   1030 	*mdp = md;
   1031 	*dposp = dpos;
   1032 	return (0);
   1033 nfsmout:
   1034 	return (error);
   1035 }
   1036 
   1037 /*
   1038  * Generate the rpc reply header
   1039  * siz arg. is used to decide if adding a cluster is worthwhile
   1040  */
   1041 nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
   1042 	int siz;
   1043 	u_long retxid;
   1044 	int err;
   1045 	struct mbuf **mrq;
   1046 	struct mbuf **mbp;
   1047 	caddr_t *bposp;
   1048 {
   1049 	register u_long *tl;
   1050 	register long t1;
   1051 	caddr_t bpos;
   1052 	struct mbuf *mreq, *mb, *mb2;
   1053 
   1054 	NFSMGETHDR(mreq);
   1055 	mb = mreq;
   1056 	if ((siz+RPC_REPLYSIZ) > MHLEN)
   1057 		MCLGET(mreq, M_WAIT);
   1058 	tl = mtod(mreq, u_long *);
   1059 	mreq->m_len = 6*NFSX_UNSIGNED;
   1060 	bpos = ((caddr_t)tl)+mreq->m_len;
   1061 	*tl++ = txdr_unsigned(retxid);
   1062 	*tl++ = rpc_reply;
   1063 	if (err == ERPCMISMATCH) {
   1064 		*tl++ = rpc_msgdenied;
   1065 		*tl++ = rpc_mismatch;
   1066 		*tl++ = txdr_unsigned(2);
   1067 		*tl = txdr_unsigned(2);
   1068 	} else {
   1069 		*tl++ = rpc_msgaccepted;
   1070 		*tl++ = 0;
   1071 		*tl++ = 0;
   1072 		switch (err) {
   1073 		case EPROGUNAVAIL:
   1074 			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
   1075 			break;
   1076 		case EPROGMISMATCH:
   1077 			*tl = txdr_unsigned(RPC_PROGMISMATCH);
   1078 			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
   1079 			*tl++ = txdr_unsigned(2);
   1080 			*tl = txdr_unsigned(2);	/* someday 3 */
   1081 			break;
   1082 		case EPROCUNAVAIL:
   1083 			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
   1084 			break;
   1085 		default:
   1086 			*tl = 0;
   1087 			if (err != VNOVAL) {
   1088 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
   1089 				*tl = txdr_unsigned(err);
   1090 			}
   1091 			break;
   1092 		};
   1093 	}
   1094 	*mrq = mreq;
   1095 	*mbp = mb;
   1096 	*bposp = bpos;
   1097 	if (err != 0 && err != VNOVAL)
   1098 		nfsstats.srvrpc_errs++;
   1099 	return (0);
   1100 }
   1101 
   1102 /*
   1103  * Nfs timer routine
   1104  * Scan the nfsreq list and retranmit any requests that have timed out
   1105  * To avoid retransmission attempts on STREAM sockets (in the future) make
   1106  * sure to set the r_retry field to 0 (implies nm_retry == 0).
   1107  */
   1108 void
   1109 nfs_timer()
   1110 {
   1111 	register struct nfsreq *rep;
   1112 	register struct mbuf *m;
   1113 	register struct socket *so;
   1114 	register struct nfsmount *nmp;
   1115 	int s, error;
   1116 
   1117 	s = splnet();
   1118 	for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
   1119 		nmp = rep->r_nmp;
   1120 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) ||
   1121 		    (so = nmp->nm_so) == NULL)
   1122 			continue;
   1123 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) {
   1124 			rep->r_flags |= R_SOFTTERM;
   1125 			continue;
   1126 		}
   1127 		if (rep->r_flags & R_TIMING)	/* update rtt in mount */
   1128 			nmp->nm_rtt++;
   1129 		/* If not timed out */
   1130 		if (++rep->r_timer < nmp->nm_rto)
   1131 			continue;
   1132 		/* Do backoff and save new timeout in mount */
   1133 		if (rep->r_flags & R_TIMING) {
   1134 			nfs_backofftimer(nmp);
   1135 			rep->r_flags &= ~R_TIMING;
   1136 			nmp->nm_rtt = -1;
   1137 		}
   1138 		if (rep->r_flags & R_SENT) {
   1139 			rep->r_flags &= ~R_SENT;
   1140 			nmp->nm_sent--;
   1141 		}
   1142 
   1143 		/*
   1144 		 * Check for too many retries on soft mount.
   1145 		 * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
   1146 		 */
   1147 		if (++rep->r_rexmit > NFS_MAXREXMIT)
   1148 			rep->r_rexmit = NFS_MAXREXMIT;
   1149 
   1150 		/*
   1151 		 * Check for server not responding
   1152 		 */
   1153 		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
   1154 		     rep->r_rexmit > NFS_FISHY) {
   1155 			nfs_msg(rep->r_procp,
   1156 			    nmp->nm_mountp->mnt_stat.f_mntfromname,
   1157 			    "not responding");
   1158 			rep->r_flags |= R_TPRINTFMSG;
   1159 		}
   1160 		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
   1161 			nfsstats.rpctimeouts++;
   1162 			rep->r_flags |= R_SOFTTERM;
   1163 			continue;
   1164 		}
   1165 		if (nmp->nm_sotype != SOCK_DGRAM)
   1166 			continue;
   1167 
   1168 		/*
   1169 		 * If there is enough space and the window allows..
   1170 		 *	Resend it
   1171 		 */
   1172 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
   1173 		       nmp->nm_sent < nmp->nm_window &&
   1174 		       (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
   1175 			nfsstats.rpcretries++;
   1176 			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
   1177 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1178 			    (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0);
   1179 			else
   1180 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1181 			    nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0);
   1182 			if (error) {
   1183 				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
   1184 					so->so_error = 0;
   1185 			} else {
   1186 				/*
   1187 				 * We need to time the request even though we
   1188 				 * are retransmitting.
   1189 				 */
   1190 				nmp->nm_rtt = 0;
   1191 				nmp->nm_sent++;
   1192 				rep->r_flags |= (R_SENT|R_TIMING);
   1193 				rep->r_timer = rep->r_timerinit;
   1194 			}
   1195 		}
   1196 	}
   1197 	splx(s);
   1198 	timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ);
   1199 }
   1200 
   1201 /*
   1202  * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
   1203  * used here. The timer state is held in the nfsmount structure and
   1204  * a single request is used to clock the response. When successful
   1205  * the rtt smoothing in nfs_updatetimer is used, when failed the backoff
   1206  * is done by nfs_backofftimer. We also log failure messages in these
   1207  * routines.
   1208  *
   1209  * Congestion variables are held in the nfshost structure which
   1210  * is referenced by nfsmounts and shared per-server. This separation
   1211  * makes it possible to do per-mount timing which allows varying disk
   1212  * access times to be dealt with, while preserving a network oriented
   1213  * congestion control scheme.
   1214  *
   1215  * The windowing implements the Jacobson/Karels slowstart algorithm
   1216  * with adjusted scaling factors. We start with one request, then send
   1217  * 4 more after each success until the ssthresh limit is reached, then
   1218  * we increment at a rate proportional to the window. On failure, we
   1219  * remember 3/4 the current window and clamp the send limit to 1. Note
   1220  * ICMP source quench is not reflected in so->so_error so we ignore that
   1221  * for now.
   1222  *
   1223  * NFS behaves much more like a transport protocol with these changes,
   1224  * shedding the teenage pedal-to-the-metal tendencies of "other"
   1225  * implementations.
   1226  *
   1227  * Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
   1228  */
   1229 
   1230 /*
   1231  * The TCP algorithm was not forgiving enough. Because the NFS server
   1232  * responds only after performing lookups/diskio/etc, we have to be
   1233  * more prepared to accept a spiky variance. The TCP algorithm is:
   1234  * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
   1235  */
   1236 #define NFS_RTO(nmp)	(((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
   1237 
   1238 nfs_updatetimer(nmp)
   1239 	register struct nfsmount *nmp;
   1240 {
   1241 
   1242 	/* If retransmitted, clear and return */
   1243 	if (nmp->nm_rexmit || nmp->nm_currexmit) {
   1244 		nmp->nm_rexmit = nmp->nm_currexmit = 0;
   1245 		return;
   1246 	}
   1247 	/* If have a measurement, do smoothing */
   1248 	if (nmp->nm_srtt) {
   1249 		register short delta;
   1250 		delta = nmp->nm_rtt - (nmp->nm_srtt >> 3);
   1251 		if ((nmp->nm_srtt += delta) <= 0)
   1252 			nmp->nm_srtt = 1;
   1253 		if (delta < 0)
   1254 			delta = -delta;
   1255 		delta -= (nmp->nm_rttvar >> 2);
   1256 		if ((nmp->nm_rttvar += delta) <= 0)
   1257 			nmp->nm_rttvar = 1;
   1258 	/* Else initialize */
   1259 	} else {
   1260 		nmp->nm_rttvar = nmp->nm_rtt << 1;
   1261 		if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2;
   1262 		nmp->nm_srtt = nmp->nm_rttvar << 2;
   1263 	}
   1264 	/* Compute new Retransmission TimeOut and clip */
   1265 	nmp->nm_rto = NFS_RTO(nmp);
   1266 	if (nmp->nm_rto < NFS_MINTIMEO)
   1267 		nmp->nm_rto = NFS_MINTIMEO;
   1268 	else if (nmp->nm_rto > NFS_MAXTIMEO)
   1269 		nmp->nm_rto = NFS_MAXTIMEO;
   1270 
   1271 	/* Update window estimate */
   1272 	if (nmp->nm_window < nmp->nm_ssthresh)	/* quickly */
   1273 		nmp->nm_window += 4;
   1274 	else {						/* slowly */
   1275 		register long incr = ++nmp->nm_winext;
   1276 		incr = (incr * incr) / nmp->nm_window;
   1277 		if (incr > 0) {
   1278 			nmp->nm_winext = 0;
   1279 			++nmp->nm_window;
   1280 		}
   1281 	}
   1282 	if (nmp->nm_window > NFS_MAXWINDOW)
   1283 		nmp->nm_window = NFS_MAXWINDOW;
   1284 }
   1285 
   1286 nfs_backofftimer(nmp)
   1287 	register struct nfsmount *nmp;
   1288 {
   1289 	register unsigned long newrto;
   1290 
   1291 	/* Clip shift count */
   1292 	if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto)
   1293 		nmp->nm_rexmit = 8 * sizeof nmp->nm_rto;
   1294 	/* Back off RTO exponentially */
   1295 	newrto = NFS_RTO(nmp);
   1296 	newrto <<= (nmp->nm_rexmit - 1);
   1297 	if (newrto == 0 || newrto > NFS_MAXTIMEO)
   1298 		newrto = NFS_MAXTIMEO;
   1299 	nmp->nm_rto = newrto;
   1300 
   1301 	/* If too many retries, message, assume a bogus RTT and re-measure */
   1302 	if (nmp->nm_currexmit < nmp->nm_rexmit) {
   1303 		nmp->nm_currexmit = nmp->nm_rexmit;
   1304 		if (nmp->nm_currexmit >= nfsrexmtthresh) {
   1305 			if (nmp->nm_currexmit == nfsrexmtthresh) {
   1306 				nmp->nm_rttvar += (nmp->nm_srtt >> 2);
   1307 				nmp->nm_srtt = 0;
   1308 			}
   1309 		}
   1310 	}
   1311 	/* Close down window but remember this point (3/4 current) for later */
   1312 	nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2;
   1313 	nmp->nm_window = 1;
   1314 	nmp->nm_winext = 0;
   1315 }
   1316 
   1317 /*
   1318  * Test for a termination signal pending on procp.
   1319  * This is used for NFSMNT_INT mounts.
   1320  */
   1321 nfs_sigintr(p)
   1322 	register struct proc *p;
   1323 {
   1324 	if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) &
   1325 	    NFSINT_SIGMASK))
   1326 		return (1);
   1327 	else
   1328 		return (0);
   1329 }
   1330 
   1331 nfs_msg(p, server, msg)
   1332 	struct proc *p;
   1333 	char *server, *msg;
   1334 {
   1335 	tpr_t tpr;
   1336 
   1337 	if (p)
   1338 		tpr = tprintf_open(p);
   1339 	else
   1340 		tpr = NULL;
   1341 	tprintf(tpr, "nfs server %s: %s\n", server, msg);
   1342 	tprintf_close(tpr);
   1343 }
   1344 
   1345 /*
   1346  * Lock a socket against others.
   1347  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
   1348  * and also to avoid race conditions between the processes with nfs requests
   1349  * in progress when a reconnect is necessary.
   1350  */
   1351 nfs_solock(flagp)
   1352 	register int *flagp;
   1353 {
   1354 
   1355 	while (*flagp & NFSMNT_SCKLOCK) {
   1356 		*flagp |= NFSMNT_WANTSCK;
   1357 		(void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0);
   1358 	}
   1359 	*flagp |= NFSMNT_SCKLOCK;
   1360 }
   1361 
   1362 /*
   1363  * Unlock the stream socket for others.
   1364  */
   1365 nfs_sounlock(flagp)
   1366 	register int *flagp;
   1367 {
   1368 
   1369 	if ((*flagp & NFSMNT_SCKLOCK) == 0)
   1370 		panic("nfs sounlock");
   1371 	*flagp &= ~NFSMNT_SCKLOCK;
   1372 	if (*flagp & NFSMNT_WANTSCK) {
   1373 		*flagp &= ~NFSMNT_WANTSCK;
   1374 		wakeup((caddr_t)flagp);
   1375 	}
   1376 }
   1377 
   1378 /*
   1379  * This function compares two net addresses by family and returns TRUE
   1380  * if they are the same.
   1381  * If there is any doubt, return FALSE.
   1382  */
   1383 nfs_netaddr_match(nam1, nam2)
   1384 	struct mbuf *nam1, *nam2;
   1385 {
   1386 	register struct sockaddr *saddr1, *saddr2;
   1387 
   1388 	saddr1 = mtod(nam1, struct sockaddr *);
   1389 	saddr2 = mtod(nam2, struct sockaddr *);
   1390 	if (saddr1->sa_family != saddr2->sa_family)
   1391 		return (0);
   1392 
   1393 	/*
   1394 	 * Must do each address family separately since unused fields
   1395 	 * are undefined values and not always zeroed.
   1396 	 */
   1397 	switch (saddr1->sa_family) {
   1398 	case AF_INET:
   1399 		if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr ==
   1400 		    ((struct sockaddr_in *)saddr2)->sin_addr.s_addr)
   1401 			return (1);
   1402 		break;
   1403 	default:
   1404 		break;
   1405 	};
   1406 	return (0);
   1407 }
   1408 
   1409 /*
   1410  * Check the hostname fields for nfsd's mask and match fields.
   1411  * By address family:
   1412  * - Bitwise AND the mask with the host address field
   1413  * - Compare for == with match
   1414  * return TRUE if not equal
   1415  */
   1416 nfs_badnam(nam, msk, mtch)
   1417 	register struct mbuf *nam, *msk, *mtch;
   1418 {
   1419 	switch (mtod(nam, struct sockaddr *)->sa_family) {
   1420 	case AF_INET:
   1421 		return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr &
   1422 			 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) !=
   1423 			 mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr);
   1424 	default:
   1425 		printf("nfs_badmatch, unknown sa_family\n");
   1426 		return (0);
   1427 	};
   1428 }
   1429