Home | History | Annotate | Line # | Download | only in nfs
nfs_socket.c revision 1.9
      1  1.1      cgd /*
      2  1.1      cgd  * Copyright (c) 1989, 1991 The Regents of the University of California.
      3  1.1      cgd  * All rights reserved.
      4  1.1      cgd  *
      5  1.1      cgd  * This code is derived from software contributed to Berkeley by
      6  1.1      cgd  * Rick Macklem at The University of Guelph.
      7  1.1      cgd  *
      8  1.1      cgd  * Redistribution and use in source and binary forms, with or without
      9  1.1      cgd  * modification, are permitted provided that the following conditions
     10  1.1      cgd  * are met:
     11  1.1      cgd  * 1. Redistributions of source code must retain the above copyright
     12  1.1      cgd  *    notice, this list of conditions and the following disclaimer.
     13  1.1      cgd  * 2. Redistributions in binary form must reproduce the above copyright
     14  1.1      cgd  *    notice, this list of conditions and the following disclaimer in the
     15  1.1      cgd  *    documentation and/or other materials provided with the distribution.
     16  1.1      cgd  * 3. All advertising materials mentioning features or use of this software
     17  1.1      cgd  *    must display the following acknowledgement:
     18  1.1      cgd  *	This product includes software developed by the University of
     19  1.1      cgd  *	California, Berkeley and its contributors.
     20  1.1      cgd  * 4. Neither the name of the University nor the names of its contributors
     21  1.1      cgd  *    may be used to endorse or promote products derived from this software
     22  1.1      cgd  *    without specific prior written permission.
     23  1.1      cgd  *
     24  1.1      cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  1.1      cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  1.1      cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  1.1      cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  1.1      cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  1.1      cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  1.1      cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  1.1      cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  1.1      cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  1.1      cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  1.1      cgd  * SUCH DAMAGE.
     35  1.1      cgd  *
     36  1.4      cgd  *	from: @(#)nfs_socket.c	7.23 (Berkeley) 4/20/91
     37  1.9  mycroft  *	$Id: nfs_socket.c,v 1.9 1993/12/18 00:45:14 mycroft Exp $
     38  1.1      cgd  */
     39  1.1      cgd 
     40  1.1      cgd /*
     41  1.1      cgd  * Socket operations for use by nfs
     42  1.1      cgd  */
     43  1.1      cgd 
     44  1.9  mycroft #include <sys/param.h>
     45  1.9  mycroft #include <sys/systm.h>
     46  1.9  mycroft #include <sys/proc.h>
     47  1.9  mycroft #include <sys/mount.h>
     48  1.9  mycroft #include <sys/kernel.h>
     49  1.9  mycroft #include <sys/malloc.h>
     50  1.9  mycroft #include <sys/mbuf.h>
     51  1.9  mycroft #include <sys/namei.h>
     52  1.9  mycroft #include <sys/vnode.h>
     53  1.9  mycroft #include <sys/domain.h>
     54  1.9  mycroft #include <sys/protosw.h>
     55  1.9  mycroft #include <sys/socket.h>
     56  1.9  mycroft #include <sys/socketvar.h>
     57  1.9  mycroft #include <sys/syslog.h>
     58  1.9  mycroft #include <sys/tprintf.h>
     59  1.1      cgd 
     60  1.9  mycroft #include <netinet/in.h>
     61  1.9  mycroft #include <netinet/tcp.h>
     62  1.9  mycroft 
     63  1.9  mycroft #include <nfs/rpcv2.h>
     64  1.9  mycroft #include <nfs/nfsv2.h>
     65  1.9  mycroft #include <nfs/nfs.h>
     66  1.9  mycroft #include <nfs/xdr_subs.h>
     67  1.9  mycroft #include <nfs/nfsm_subs.h>
     68  1.9  mycroft #include <nfs/nfsmount.h>
     69  1.1      cgd 
     70  1.1      cgd #define	TRUE	1
     71  1.1      cgd #define	FALSE	0
     72  1.1      cgd 
     73  1.1      cgd /*
     74  1.1      cgd  * External data, mostly RPC constants in XDR form
     75  1.1      cgd  */
     76  1.1      cgd extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
     77  1.1      cgd 	rpc_msgaccepted, rpc_call;
     78  1.1      cgd extern u_long nfs_prog, nfs_vers;
     79  1.1      cgd /* Maybe these should be bits in a u_long ?? */
     80  1.3    glass /*
     81  1.3    glass  * Static array that defines which nfs rpc's are nonidempotent
     82  1.3    glass  */
     83  1.3    glass int nonidempotent[NFS_NPROCS] = {
     84  1.3    glass 	FALSE,
     85  1.3    glass 	FALSE,
     86  1.3    glass 	TRUE,
     87  1.3    glass 	FALSE,
     88  1.3    glass 	FALSE,
     89  1.3    glass 	FALSE,
     90  1.3    glass 	FALSE,
     91  1.3    glass 	FALSE,
     92  1.3    glass 	TRUE,
     93  1.3    glass 	TRUE,
     94  1.3    glass 	TRUE,
     95  1.3    glass 	TRUE,
     96  1.3    glass 	TRUE,
     97  1.3    glass 	TRUE,
     98  1.3    glass 	TRUE,
     99  1.3    glass 	TRUE,
    100  1.3    glass 	FALSE,
    101  1.3    glass 	FALSE,
    102  1.3    glass };
    103  1.1      cgd static int compressrequest[NFS_NPROCS] = {
    104  1.1      cgd 	FALSE,
    105  1.1      cgd 	TRUE,
    106  1.1      cgd 	TRUE,
    107  1.1      cgd 	FALSE,
    108  1.1      cgd 	TRUE,
    109  1.1      cgd 	TRUE,
    110  1.1      cgd 	TRUE,
    111  1.1      cgd 	FALSE,
    112  1.1      cgd 	FALSE,
    113  1.1      cgd 	TRUE,
    114  1.1      cgd 	TRUE,
    115  1.1      cgd 	TRUE,
    116  1.1      cgd 	TRUE,
    117  1.1      cgd 	TRUE,
    118  1.1      cgd 	TRUE,
    119  1.1      cgd 	TRUE,
    120  1.1      cgd 	TRUE,
    121  1.1      cgd 	TRUE,
    122  1.1      cgd };
    123  1.1      cgd int	nfs_sbwait();
    124  1.1      cgd void	nfs_disconnect();
    125  1.1      cgd struct mbuf *nfs_compress(), *nfs_uncompress();
    126  1.1      cgd 
    127  1.1      cgd 
    128  1.1      cgd struct nfsreq nfsreqh;
    129  1.1      cgd int nfsrexmtthresh = NFS_FISHY;
    130  1.1      cgd int nfs_tcpnodelay = 1;
    131  1.1      cgd 
    132  1.1      cgd /*
    133  1.1      cgd  * Initialize sockets and congestion for a new NFS connection.
    134  1.1      cgd  * We do not free the sockaddr if error.
    135  1.1      cgd  */
    136  1.1      cgd nfs_connect(nmp)
    137  1.1      cgd 	register struct nfsmount *nmp;
    138  1.1      cgd {
    139  1.1      cgd 	register struct socket *so;
    140  1.2      cgd 	struct sockaddr *saddr;					/* 08 Sep 92*/
    141  1.1      cgd 	int s, error, bufsize;
    142  1.1      cgd 	struct mbuf *m;
    143  1.2      cgd 	struct sockaddr_in *sin;				/* 08 Sep 92*/
    144  1.2      cgd 	u_short tport;						/* 08 Sep 92*/
    145  1.1      cgd 
    146  1.1      cgd 	nmp->nm_so = (struct socket *)0;
    147  1.2      cgd 	saddr = mtod(nmp->nm_nam, struct sockaddr *);		/* 08 Sep 92*/
    148  1.2      cgd 	if (error = socreate(saddr->sa_family,			/* 08 Sep 92*/
    149  1.1      cgd 		&nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
    150  1.1      cgd 		goto bad;
    151  1.1      cgd 	so = nmp->nm_so;
    152  1.1      cgd 	nmp->nm_soflags = so->so_proto->pr_flags;
    153  1.1      cgd 
    154  1.2      cgd 	/*
    155  1.2      cgd 	 * 08 Sep 92
    156  1.2      cgd 	 *
    157  1.2      cgd 	 * Some servers require that the client port be a reserved port number.
    158  1.2      cgd 	 */
    159  1.2      cgd 	if (saddr->sa_family == AF_INET) {
    160  1.2      cgd 		MGET(m, M_WAIT, MT_SONAME);
    161  1.2      cgd 		sin = mtod(m, struct sockaddr_in *);
    162  1.2      cgd 		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
    163  1.2      cgd 		sin->sin_family = AF_INET;
    164  1.2      cgd 		sin->sin_addr.s_addr = INADDR_ANY;
    165  1.2      cgd 		tport = IPPORT_RESERVED - 1;
    166  1.2      cgd 		sin->sin_port = htons(tport);
    167  1.2      cgd 		while (sobind(so, m) == EADDRINUSE &&
    168  1.2      cgd 		       --tport > IPPORT_RESERVED / 2)
    169  1.2      cgd 			sin->sin_port = htons(tport);
    170  1.2      cgd 		m_freem(m);
    171  1.2      cgd 	}
    172  1.2      cgd 
    173  1.1      cgd 	if (nmp->nm_sotype == SOCK_DGRAM)
    174  1.1      cgd 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR),
    175  1.1      cgd 		    NFS_MAXPACKET);
    176  1.1      cgd 	else
    177  1.1      cgd 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)),
    178  1.1      cgd 		    NFS_MAXPACKET + sizeof(u_long));
    179  1.1      cgd 	if (error = soreserve(so, bufsize, bufsize))
    180  1.1      cgd 		goto bad;
    181  1.1      cgd 
    182  1.1      cgd 	/*
    183  1.1      cgd 	 * Protocols that do not require connections may be optionally left
    184  1.1      cgd 	 * unconnected for servers that reply from a port other than NFS_PORT.
    185  1.1      cgd 	 */
    186  1.1      cgd 	if (nmp->nm_flag & NFSMNT_NOCONN) {
    187  1.1      cgd 		if (nmp->nm_soflags & PR_CONNREQUIRED) {
    188  1.1      cgd 			error = ENOTCONN;
    189  1.1      cgd 			goto bad;
    190  1.1      cgd 		}
    191  1.1      cgd 	} else {
    192  1.1      cgd 		if (error = soconnect(so, nmp->nm_nam))
    193  1.1      cgd 			goto bad;
    194  1.1      cgd 
    195  1.1      cgd 		/*
    196  1.1      cgd 		 * Wait for the connection to complete. Cribbed from the
    197  1.1      cgd 		 * connect system call but with the wait at negative prio.
    198  1.1      cgd 		 */
    199  1.1      cgd 		s = splnet();
    200  1.1      cgd 		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
    201  1.1      cgd 			(void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0);
    202  1.1      cgd 		splx(s);
    203  1.1      cgd 		if (so->so_error) {
    204  1.1      cgd 			error = so->so_error;
    205  1.1      cgd 			goto bad;
    206  1.1      cgd 		}
    207  1.1      cgd 	}
    208  1.1      cgd 	if (nmp->nm_sotype == SOCK_DGRAM) {
    209  1.1      cgd 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    210  1.1      cgd 			so->so_rcv.sb_timeo = (5 * hz);
    211  1.1      cgd 			so->so_snd.sb_timeo = (5 * hz);
    212  1.1      cgd 		} else {
    213  1.1      cgd 			so->so_rcv.sb_timeo = 0;
    214  1.1      cgd 			so->so_snd.sb_timeo = 0;
    215  1.1      cgd 		}
    216  1.1      cgd 		nmp->nm_rto = NFS_TIMEO;
    217  1.1      cgd 	} else {
    218  1.1      cgd 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    219  1.1      cgd 			so->so_rcv.sb_timeo = (5 * hz);
    220  1.1      cgd 			so->so_snd.sb_timeo = (5 * hz);
    221  1.1      cgd 		} else {
    222  1.1      cgd 			so->so_rcv.sb_timeo = 0;
    223  1.1      cgd 			so->so_snd.sb_timeo = 0;
    224  1.1      cgd 		}
    225  1.1      cgd 		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    226  1.1      cgd 			MGET(m, M_WAIT, MT_SOOPTS);
    227  1.1      cgd 			*mtod(m, int *) = 1;
    228  1.1      cgd 			m->m_len = sizeof(int);
    229  1.1      cgd 			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
    230  1.1      cgd 		}
    231  1.1      cgd 		if (so->so_proto->pr_domain->dom_family == AF_INET &&
    232  1.1      cgd 		    so->so_proto->pr_protocol == IPPROTO_TCP &&
    233  1.1      cgd 		    nfs_tcpnodelay) {
    234  1.1      cgd 			MGET(m, M_WAIT, MT_SOOPTS);
    235  1.1      cgd 			*mtod(m, int *) = 1;
    236  1.1      cgd 			m->m_len = sizeof(int);
    237  1.1      cgd 			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
    238  1.1      cgd 		}
    239  1.1      cgd 		nmp->nm_rto = 10 * NFS_TIMEO;		/* XXX */
    240  1.1      cgd 	}
    241  1.1      cgd 	so->so_rcv.sb_flags |= SB_NOINTR;
    242  1.1      cgd 	so->so_snd.sb_flags |= SB_NOINTR;
    243  1.1      cgd 
    244  1.1      cgd 	/* Initialize other non-zero congestion variables */
    245  1.1      cgd 	nmp->nm_window = 2;			/* Initial send window */
    246  1.1      cgd 	nmp->nm_ssthresh = NFS_MAXWINDOW;	/* Slowstart threshold */
    247  1.1      cgd 	nmp->nm_rttvar = nmp->nm_rto << 1;
    248  1.1      cgd 	nmp->nm_sent = 0;
    249  1.1      cgd 	nmp->nm_currexmit = 0;
    250  1.1      cgd 	return (0);
    251  1.1      cgd 
    252  1.1      cgd bad:
    253  1.1      cgd 	nfs_disconnect(nmp);
    254  1.1      cgd 	return (error);
    255  1.1      cgd }
    256  1.1      cgd 
    257  1.1      cgd /*
    258  1.1      cgd  * Reconnect routine:
    259  1.1      cgd  * Called when a connection is broken on a reliable protocol.
    260  1.1      cgd  * - clean up the old socket
    261  1.1      cgd  * - nfs_connect() again
    262  1.1      cgd  * - set R_MUSTRESEND for all outstanding requests on mount point
    263  1.1      cgd  * If this fails the mount point is DEAD!
    264  1.1      cgd  * nb: Must be called with the nfs_solock() set on the mount point.
    265  1.1      cgd  */
    266  1.1      cgd nfs_reconnect(rep, nmp)
    267  1.1      cgd 	register struct nfsreq *rep;
    268  1.1      cgd 	register struct nfsmount *nmp;
    269  1.1      cgd {
    270  1.1      cgd 	register struct nfsreq *rp;
    271  1.1      cgd 	int error;
    272  1.1      cgd 
    273  1.1      cgd 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    274  1.1      cgd 	    "trying reconnect");
    275  1.1      cgd 	while (error = nfs_connect(nmp)) {
    276  1.1      cgd #ifdef lint
    277  1.1      cgd 		error = error;
    278  1.1      cgd #endif /* lint */
    279  1.1      cgd 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp))
    280  1.1      cgd 			return (EINTR);
    281  1.1      cgd 		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
    282  1.1      cgd 	}
    283  1.1      cgd 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    284  1.1      cgd 	    "reconnected");
    285  1.1      cgd 
    286  1.1      cgd 	/*
    287  1.1      cgd 	 * Loop through outstanding request list and fix up all requests
    288  1.1      cgd 	 * on old socket.
    289  1.1      cgd 	 */
    290  1.1      cgd 	rp = nfsreqh.r_next;
    291  1.1      cgd 	while (rp != &nfsreqh) {
    292  1.1      cgd 		if (rp->r_nmp == nmp)
    293  1.1      cgd 			rp->r_flags |= R_MUSTRESEND;
    294  1.1      cgd 		rp = rp->r_next;
    295  1.1      cgd 	}
    296  1.1      cgd 	return (0);
    297  1.1      cgd }
    298  1.1      cgd 
    299  1.1      cgd /*
    300  1.1      cgd  * NFS disconnect. Clean up and unlink.
    301  1.1      cgd  */
    302  1.1      cgd void
    303  1.1      cgd nfs_disconnect(nmp)
    304  1.1      cgd 	register struct nfsmount *nmp;
    305  1.1      cgd {
    306  1.1      cgd 	register struct socket *so;
    307  1.1      cgd 
    308  1.1      cgd 	if (nmp->nm_so) {
    309  1.1      cgd 		so = nmp->nm_so;
    310  1.1      cgd 		nmp->nm_so = (struct socket *)0;
    311  1.1      cgd 		soshutdown(so, 2);
    312  1.1      cgd 		soclose(so);
    313  1.1      cgd 	}
    314  1.1      cgd }
    315  1.1      cgd 
    316  1.1      cgd /*
    317  1.1      cgd  * This is the nfs send routine. For connection based socket types, it
    318  1.1      cgd  * must be called with an nfs_solock() on the socket.
    319  1.1      cgd  * "rep == NULL" indicates that it has been called from a server.
    320  1.1      cgd  */
    321  1.1      cgd nfs_send(so, nam, top, rep)
    322  1.1      cgd 	register struct socket *so;
    323  1.1      cgd 	struct mbuf *nam;
    324  1.1      cgd 	register struct mbuf *top;
    325  1.1      cgd 	struct nfsreq *rep;
    326  1.1      cgd {
    327  1.1      cgd 	struct mbuf *sendnam;
    328  1.1      cgd 	int error, soflags;
    329  1.1      cgd 
    330  1.1      cgd 	if (rep) {
    331  1.1      cgd 		if (rep->r_flags & R_SOFTTERM) {
    332  1.1      cgd 			m_freem(top);
    333  1.1      cgd 			return (EINTR);
    334  1.1      cgd 		}
    335  1.1      cgd 		if (rep->r_nmp->nm_so == NULL &&
    336  1.1      cgd 		    (error = nfs_reconnect(rep, rep->r_nmp)))
    337  1.1      cgd 			return (error);
    338  1.1      cgd 		rep->r_flags &= ~R_MUSTRESEND;
    339  1.1      cgd 		so = rep->r_nmp->nm_so;
    340  1.1      cgd 		soflags = rep->r_nmp->nm_soflags;
    341  1.1      cgd 	} else
    342  1.1      cgd 		soflags = so->so_proto->pr_flags;
    343  1.1      cgd 	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
    344  1.1      cgd 		sendnam = (struct mbuf *)0;
    345  1.1      cgd 	else
    346  1.1      cgd 		sendnam = nam;
    347  1.1      cgd 
    348  1.1      cgd 	error = sosend(so, sendnam, (struct uio *)0, top,
    349  1.1      cgd 		(struct mbuf *)0, 0);
    350  1.1      cgd 	if (error == EWOULDBLOCK && rep) {
    351  1.1      cgd 		if (rep->r_flags & R_SOFTTERM)
    352  1.1      cgd 			error = EINTR;
    353  1.1      cgd 		else {
    354  1.1      cgd 			rep->r_flags |= R_MUSTRESEND;
    355  1.1      cgd 			error = 0;
    356  1.1      cgd 		}
    357  1.1      cgd 	}
    358  1.1      cgd 	/*
    359  1.1      cgd 	 * Ignore socket errors??
    360  1.1      cgd 	 */
    361  1.1      cgd 	if (error && error != EINTR && error != ERESTART)
    362  1.1      cgd 		error = 0;
    363  1.1      cgd 	return (error);
    364  1.1      cgd }
    365  1.1      cgd 
    366  1.1      cgd /*
    367  1.1      cgd  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
    368  1.1      cgd  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
    369  1.1      cgd  * Mark and consolidate the data into a new mbuf list.
    370  1.1      cgd  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
    371  1.1      cgd  *     small mbufs.
    372  1.1      cgd  * For SOCK_STREAM we must be very careful to read an entire record once
    373  1.1      cgd  * we have read any of it, even if the system call has been interrupted.
    374  1.1      cgd  */
    375  1.1      cgd nfs_receive(so, aname, mp, rep)
    376  1.1      cgd 	register struct socket *so;
    377  1.1      cgd 	struct mbuf **aname;
    378  1.1      cgd 	struct mbuf **mp;
    379  1.1      cgd 	register struct nfsreq *rep;
    380  1.1      cgd {
    381  1.1      cgd 	struct uio auio;
    382  1.1      cgd 	struct iovec aio;
    383  1.1      cgd 	register struct mbuf *m;
    384  1.1      cgd 	struct mbuf *m2, *mnew, **mbp;
    385  1.1      cgd 	caddr_t fcp, tcp;
    386  1.1      cgd 	u_long len;
    387  1.1      cgd 	struct mbuf **getnam;
    388  1.1      cgd 	int error, siz, mlen, soflags, rcvflg;
    389  1.1      cgd 
    390  1.1      cgd 	/*
    391  1.1      cgd 	 * Set up arguments for soreceive()
    392  1.1      cgd 	 */
    393  1.1      cgd 	*mp = (struct mbuf *)0;
    394  1.1      cgd 	*aname = (struct mbuf *)0;
    395  1.1      cgd 	if (rep)
    396  1.1      cgd 		soflags = rep->r_nmp->nm_soflags;
    397  1.1      cgd 	else
    398  1.1      cgd 		soflags = so->so_proto->pr_flags;
    399  1.1      cgd 
    400  1.1      cgd 	/*
    401  1.1      cgd 	 * For reliable protocols, lock against other senders/receivers
    402  1.1      cgd 	 * in case a reconnect is necessary.
    403  1.1      cgd 	 * For SOCK_STREAM, first get the Record Mark to find out how much
    404  1.1      cgd 	 * more there is to get.
    405  1.1      cgd 	 * We must lock the socket against other receivers
    406  1.1      cgd 	 * until we have an entire rpc request/reply.
    407  1.1      cgd 	 */
    408  1.1      cgd 	if (soflags & PR_CONNREQUIRED) {
    409  1.1      cgd tryagain:
    410  1.1      cgd 		/*
    411  1.1      cgd 		 * Check for fatal errors and resending request.
    412  1.1      cgd 		 */
    413  1.1      cgd 		if (rep) {
    414  1.1      cgd 			/*
    415  1.1      cgd 			 * Ugh: If a reconnect attempt just happened, nm_so
    416  1.1      cgd 			 * would have changed. NULL indicates a failed
    417  1.1      cgd 			 * attempt that has essentially shut down this
    418  1.1      cgd 			 * mount point.
    419  1.1      cgd 			 */
    420  1.1      cgd 			if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL ||
    421  1.1      cgd 				(rep->r_flags & R_SOFTTERM))
    422  1.1      cgd 				return (EINTR);
    423  1.1      cgd 			while (rep->r_flags & R_MUSTRESEND) {
    424  1.1      cgd 				m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
    425  1.1      cgd 				nfsstats.rpcretries++;
    426  1.1      cgd 				if (error = nfs_send(so, rep->r_nmp->nm_nam, m,
    427  1.1      cgd 					rep))
    428  1.1      cgd 					goto errout;
    429  1.1      cgd 			}
    430  1.1      cgd 		}
    431  1.1      cgd 		if ((soflags & PR_ATOMIC) == 0) {
    432  1.1      cgd 			aio.iov_base = (caddr_t) &len;
    433  1.1      cgd 			aio.iov_len = sizeof(u_long);
    434  1.1      cgd 			auio.uio_iov = &aio;
    435  1.1      cgd 			auio.uio_iovcnt = 1;
    436  1.1      cgd 			auio.uio_segflg = UIO_SYSSPACE;
    437  1.1      cgd 			auio.uio_rw = UIO_READ;
    438  1.1      cgd 			auio.uio_procp = (struct proc *)0;
    439  1.1      cgd 			auio.uio_offset = 0;
    440  1.1      cgd 			auio.uio_resid = sizeof(u_long);
    441  1.1      cgd 			do {
    442  1.1      cgd 			    rcvflg = MSG_WAITALL;
    443  1.1      cgd 			    error = soreceive(so, (struct mbuf **)0, &auio,
    444  1.1      cgd 				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
    445  1.1      cgd 			    if (error == EWOULDBLOCK && rep) {
    446  1.1      cgd 				if (rep->r_flags & R_SOFTTERM)
    447  1.1      cgd 					return (EINTR);
    448  1.1      cgd 				if (rep->r_flags & R_MUSTRESEND)
    449  1.1      cgd 					goto tryagain;
    450  1.1      cgd 			    }
    451  1.1      cgd 			} while (error == EWOULDBLOCK);
    452  1.1      cgd 			if (!error && auio.uio_resid > 0) {
    453  1.1      cgd 			    if (rep)
    454  1.1      cgd 				log(LOG_INFO,
    455  1.1      cgd 				   "short receive (%d/%d) from nfs server %s\n",
    456  1.1      cgd 				   sizeof(u_long) - auio.uio_resid,
    457  1.1      cgd 				   sizeof(u_long),
    458  1.1      cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    459  1.1      cgd 			    error = EPIPE;
    460  1.1      cgd 			}
    461  1.1      cgd 			if (error)
    462  1.1      cgd 				goto errout;
    463  1.1      cgd 			len = ntohl(len) & ~0x80000000;
    464  1.1      cgd 			/*
    465  1.1      cgd 			 * This is SERIOUS! We are out of sync with the sender
    466  1.1      cgd 			 * and forcing a disconnect/reconnect is all I can do.
    467  1.1      cgd 			 */
    468  1.1      cgd 			if (len > NFS_MAXPACKET) {
    469  1.1      cgd 			    if (rep)
    470  1.1      cgd 				log(LOG_ERR, "%s (%d) from nfs server %s\n",
    471  1.1      cgd 				    "impossible packet length",
    472  1.1      cgd 				    len,
    473  1.1      cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    474  1.1      cgd 			    error = EFBIG;
    475  1.1      cgd 			    goto errout;
    476  1.1      cgd 			}
    477  1.1      cgd 			auio.uio_resid = len;
    478  1.1      cgd 			do {
    479  1.1      cgd 			    rcvflg = MSG_WAITALL;
    480  1.1      cgd 			    error =  soreceive(so, (struct mbuf **)0,
    481  1.1      cgd 				&auio, mp, (struct mbuf **)0, &rcvflg);
    482  1.1      cgd 			} while (error == EWOULDBLOCK || error == EINTR ||
    483  1.1      cgd 				 error == ERESTART);
    484  1.1      cgd 			if (!error && auio.uio_resid > 0) {
    485  1.1      cgd 			    if (rep)
    486  1.1      cgd 				log(LOG_INFO,
    487  1.1      cgd 				   "short receive (%d/%d) from nfs server %s\n",
    488  1.1      cgd 				   len - auio.uio_resid, len,
    489  1.1      cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    490  1.1      cgd 			    error = EPIPE;
    491  1.1      cgd 			}
    492  1.1      cgd 		} else {
    493  1.1      cgd 			auio.uio_resid = len = 1000000;	/* Anything Big */
    494  1.1      cgd 			do {
    495  1.1      cgd 			    rcvflg = 0;
    496  1.1      cgd 			    error =  soreceive(so, (struct mbuf **)0,
    497  1.1      cgd 				&auio, mp, (struct mbuf **)0, &rcvflg);
    498  1.1      cgd 			    if (error == EWOULDBLOCK && rep) {
    499  1.1      cgd 				if (rep->r_flags & R_SOFTTERM)
    500  1.1      cgd 					return (EINTR);
    501  1.1      cgd 				if (rep->r_flags & R_MUSTRESEND)
    502  1.1      cgd 					goto tryagain;
    503  1.1      cgd 			    }
    504  1.1      cgd 			} while (error == EWOULDBLOCK);
    505  1.1      cgd 			if (!error && *mp == NULL)
    506  1.1      cgd 				error = EPIPE;
    507  1.1      cgd 			len -= auio.uio_resid;
    508  1.1      cgd 		}
    509  1.1      cgd errout:
    510  1.1      cgd 		if (error && rep && error != EINTR && error != ERESTART) {
    511  1.1      cgd 			m_freem(*mp);
    512  1.1      cgd 			*mp = (struct mbuf *)0;
    513  1.1      cgd 			if (error != EPIPE && rep)
    514  1.1      cgd 				log(LOG_INFO,
    515  1.1      cgd 				    "receive error %d from nfs server %s\n",
    516  1.1      cgd 				    error,
    517  1.1      cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    518  1.1      cgd 			nfs_disconnect(rep->r_nmp);
    519  1.1      cgd 			error = nfs_reconnect(rep, rep->r_nmp);
    520  1.1      cgd 			if (!error)
    521  1.1      cgd 				goto tryagain;
    522  1.1      cgd 		}
    523  1.1      cgd 	} else {
    524  1.1      cgd 		if (so->so_state & SS_ISCONNECTED)
    525  1.1      cgd 			getnam = (struct mbuf **)0;
    526  1.1      cgd 		else
    527  1.1      cgd 			getnam = aname;
    528  1.1      cgd 		auio.uio_resid = len = 1000000;
    529  1.1      cgd 		do {
    530  1.1      cgd 			rcvflg = 0;
    531  1.1      cgd 			error =  soreceive(so, getnam, &auio, mp,
    532  1.1      cgd 				(struct mbuf **)0, &rcvflg);
    533  1.1      cgd 			if (error == EWOULDBLOCK && rep &&
    534  1.1      cgd 			    (rep->r_flags & R_SOFTTERM))
    535  1.1      cgd 				return (EINTR);
    536  1.1      cgd 		} while (error == EWOULDBLOCK);
    537  1.1      cgd 		len -= auio.uio_resid;
    538  1.1      cgd 	}
    539  1.1      cgd 	if (error) {
    540  1.1      cgd 		m_freem(*mp);
    541  1.1      cgd 		*mp = (struct mbuf *)0;
    542  1.1      cgd 	}
    543  1.1      cgd 	/*
    544  1.1      cgd 	 * Search for any mbufs that are not a multiple of 4 bytes long.
    545  1.1      cgd 	 * These could cause pointer alignment problems, so copy them to
    546  1.1      cgd 	 * well aligned mbufs.
    547  1.1      cgd 	 */
    548  1.1      cgd 	m = *mp;
    549  1.1      cgd 	mbp = mp;
    550  1.1      cgd 	while (m) {
    551  1.1      cgd 		/*
    552  1.1      cgd 		 * All this for something that may never happen.
    553  1.1      cgd 		 */
    554  1.1      cgd 		if (m->m_next && (m->m_len & 0x3)) {
    555  1.1      cgd 			printf("nfs_rcv odd length!\n");
    556  1.1      cgd 			mlen = 0;
    557  1.1      cgd 			while (m) {
    558  1.1      cgd 				fcp = mtod(m, caddr_t);
    559  1.1      cgd 				while (m->m_len > 0) {
    560  1.1      cgd 					if (mlen == 0) {
    561  1.1      cgd 						MGET(m2, M_WAIT, MT_DATA);
    562  1.1      cgd 						if (len >= MINCLSIZE)
    563  1.1      cgd 							MCLGET(m2, M_WAIT);
    564  1.1      cgd 						m2->m_len = 0;
    565  1.1      cgd 						mlen = M_TRAILINGSPACE(m2);
    566  1.1      cgd 						tcp = mtod(m2, caddr_t);
    567  1.1      cgd 						*mbp = m2;
    568  1.1      cgd 						mbp = &m2->m_next;
    569  1.1      cgd 					}
    570  1.1      cgd 					siz = MIN(mlen, m->m_len);
    571  1.1      cgd 					bcopy(fcp, tcp, siz);
    572  1.1      cgd 					m2->m_len += siz;
    573  1.1      cgd 					mlen -= siz;
    574  1.1      cgd 					len -= siz;
    575  1.1      cgd 					tcp += siz;
    576  1.1      cgd 					m->m_len -= siz;
    577  1.1      cgd 					fcp += siz;
    578  1.1      cgd 				}
    579  1.1      cgd 				MFREE(m, mnew);
    580  1.1      cgd 				m = mnew;
    581  1.1      cgd 			}
    582  1.1      cgd 			break;
    583  1.1      cgd 		}
    584  1.1      cgd 		len -= m->m_len;
    585  1.1      cgd 		mbp = &m->m_next;
    586  1.1      cgd 		m = m->m_next;
    587  1.1      cgd 	}
    588  1.1      cgd 	return (error);
    589  1.1      cgd }
    590  1.1      cgd 
    591  1.1      cgd /*
    592  1.1      cgd  * Implement receipt of reply on a socket.
    593  1.1      cgd  * We must search through the list of received datagrams matching them
    594  1.1      cgd  * with outstanding requests using the xid, until ours is found.
    595  1.1      cgd  */
    596  1.1      cgd /* ARGSUSED */
    597  1.1      cgd nfs_reply(nmp, myrep)
    598  1.1      cgd 	struct nfsmount *nmp;
    599  1.1      cgd 	struct nfsreq *myrep;
    600  1.1      cgd {
    601  1.1      cgd 	register struct mbuf *m;
    602  1.1      cgd 	register struct nfsreq *rep;
    603  1.1      cgd 	register int error = 0;
    604  1.1      cgd 	u_long rxid;
    605  1.1      cgd 	struct mbuf *mp, *nam;
    606  1.1      cgd 	char *cp;
    607  1.1      cgd 	int cnt, xfer;
    608  1.1      cgd 
    609  1.1      cgd 	/*
    610  1.1      cgd 	 * Loop around until we get our own reply
    611  1.1      cgd 	 */
    612  1.1      cgd 	for (;;) {
    613  1.1      cgd 		/*
    614  1.1      cgd 		 * Lock against other receivers so that I don't get stuck in
    615  1.1      cgd 		 * sbwait() after someone else has received my reply for me.
    616  1.1      cgd 		 * Also necessary for connection based protocols to avoid
    617  1.1      cgd 		 * race conditions during a reconnect.
    618  1.1      cgd 		 */
    619  1.1      cgd 		nfs_solock(&nmp->nm_flag);
    620  1.1      cgd 		/* Already received, bye bye */
    621  1.1      cgd 		if (myrep->r_mrep != NULL) {
    622  1.1      cgd 			nfs_sounlock(&nmp->nm_flag);
    623  1.1      cgd 			return (0);
    624  1.1      cgd 		}
    625  1.1      cgd 		/*
    626  1.1      cgd 		 * Get the next Rpc reply off the socket
    627  1.1      cgd 		 */
    628  1.1      cgd 		if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) {
    629  1.1      cgd 			nfs_sounlock(&nmp->nm_flag);
    630  1.1      cgd 
    631  1.1      cgd 			/*
    632  1.1      cgd 			 * Ignore routing errors on connectionless protocols??
    633  1.1      cgd 			 */
    634  1.1      cgd 			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
    635  1.1      cgd 				nmp->nm_so->so_error = 0;
    636  1.1      cgd 				continue;
    637  1.1      cgd 			}
    638  1.1      cgd 
    639  1.1      cgd 			/*
    640  1.1      cgd 			 * Otherwise cleanup and return a fatal error.
    641  1.1      cgd 			 */
    642  1.1      cgd 			if (myrep->r_flags & R_TIMING) {
    643  1.1      cgd 				myrep->r_flags &= ~R_TIMING;
    644  1.1      cgd 				nmp->nm_rtt = -1;
    645  1.1      cgd 			}
    646  1.1      cgd 			if (myrep->r_flags & R_SENT) {
    647  1.1      cgd 				myrep->r_flags &= ~R_SENT;
    648  1.1      cgd 				nmp->nm_sent--;
    649  1.1      cgd 			}
    650  1.1      cgd 			return (error);
    651  1.1      cgd 		}
    652  1.1      cgd 
    653  1.1      cgd 		/*
    654  1.1      cgd 		 * Get the xid and check that it is an rpc reply
    655  1.1      cgd 		 */
    656  1.1      cgd 		m = mp;
    657  1.1      cgd 		while (m && m->m_len == 0)
    658  1.1      cgd 			m = m->m_next;
    659  1.1      cgd 		if (m == NULL) {
    660  1.1      cgd 			nfsstats.rpcinvalid++;
    661  1.1      cgd 			m_freem(mp);
    662  1.1      cgd 			nfs_sounlock(&nmp->nm_flag);
    663  1.1      cgd 			continue;
    664  1.1      cgd 		}
    665  1.1      cgd 		bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED);
    666  1.1      cgd 		/*
    667  1.1      cgd 		 * Loop through the request list to match up the reply
    668  1.1      cgd 		 * Iff no match, just drop the datagram
    669  1.1      cgd 		 */
    670  1.1      cgd 		m = mp;
    671  1.1      cgd 		rep = nfsreqh.r_next;
    672  1.1      cgd 		while (rep != &nfsreqh) {
    673  1.1      cgd 			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
    674  1.1      cgd 				/* Found it.. */
    675  1.1      cgd 				rep->r_mrep = m;
    676  1.1      cgd 				/*
    677  1.1      cgd 				 * Update timing
    678  1.1      cgd 				 */
    679  1.1      cgd 				if (rep->r_flags & R_TIMING) {
    680  1.1      cgd 					nfs_updatetimer(rep->r_nmp);
    681  1.1      cgd 					rep->r_flags &= ~R_TIMING;
    682  1.1      cgd 					rep->r_nmp->nm_rtt = -1;
    683  1.1      cgd 				}
    684  1.1      cgd 				if (rep->r_flags & R_SENT) {
    685  1.1      cgd 					rep->r_flags &= ~R_SENT;
    686  1.1      cgd 					rep->r_nmp->nm_sent--;
    687  1.1      cgd 				}
    688  1.1      cgd 				break;
    689  1.1      cgd 			}
    690  1.1      cgd 			rep = rep->r_next;
    691  1.1      cgd 		}
    692  1.1      cgd 		nfs_sounlock(&nmp->nm_flag);
    693  1.1      cgd 		if (nam)
    694  1.1      cgd 			m_freem(nam);
    695  1.1      cgd 		/*
    696  1.1      cgd 		 * If not matched to a request, drop it.
    697  1.1      cgd 		 * If it's mine, get out.
    698  1.1      cgd 		 */
    699  1.1      cgd 		if (rep == &nfsreqh) {
    700  1.1      cgd 			nfsstats.rpcunexpected++;
    701  1.1      cgd 			m_freem(m);
    702  1.1      cgd 		} else if (rep == myrep)
    703  1.1      cgd 			return (0);
    704  1.1      cgd 	}
    705  1.1      cgd }
    706  1.1      cgd 
    707  1.1      cgd /*
    708  1.1      cgd  * nfs_request - goes something like this
    709  1.1      cgd  *	- fill in request struct
    710  1.1      cgd  *	- links it into list
    711  1.1      cgd  *	- calls nfs_send() for first transmit
    712  1.1      cgd  *	- calls nfs_receive() to get reply
    713  1.1      cgd  *	- break down rpc header and return with nfs reply pointed to
    714  1.1      cgd  *	  by mrep or error
    715  1.1      cgd  * nb: always frees up mreq mbuf list
    716  1.1      cgd  */
    717  1.1      cgd nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp)
    718  1.1      cgd 	struct vnode *vp;
    719  1.1      cgd 	struct mbuf *mreq;
    720  1.1      cgd 	u_long xid;
    721  1.1      cgd 	int procnum;
    722  1.1      cgd 	struct proc *procp;
    723  1.1      cgd 	int tryhard;
    724  1.1      cgd 	struct mount *mp;
    725  1.1      cgd 	struct mbuf **mrp;
    726  1.1      cgd 	struct mbuf **mdp;
    727  1.1      cgd 	caddr_t *dposp;
    728  1.1      cgd {
    729  1.1      cgd 	register struct mbuf *m, *mrep;
    730  1.1      cgd 	register struct nfsreq *rep;
    731  1.1      cgd 	register u_long *tl;
    732  1.1      cgd 	register int len;
    733  1.1      cgd 	struct nfsmount *nmp;
    734  1.1      cgd 	struct mbuf *md;
    735  1.1      cgd 	struct nfsreq *reph;
    736  1.1      cgd 	caddr_t dpos;
    737  1.1      cgd 	char *cp2;
    738  1.1      cgd 	int t1;
    739  1.1      cgd 	int s, compressed;
    740  1.1      cgd 	int error = 0;
    741  1.1      cgd 
    742  1.1      cgd 	nmp = VFSTONFS(mp);
    743  1.1      cgd 	m = mreq;
    744  1.1      cgd 	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
    745  1.1      cgd 	rep->r_xid = xid;
    746  1.1      cgd 	rep->r_nmp = nmp;
    747  1.1      cgd 	rep->r_vp = vp;
    748  1.1      cgd 	rep->r_procp = procp;
    749  1.1      cgd 	if ((nmp->nm_flag & NFSMNT_SOFT) ||
    750  1.1      cgd 	    ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard))
    751  1.1      cgd 		rep->r_retry = nmp->nm_retry;
    752  1.1      cgd 	else
    753  1.1      cgd 		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
    754  1.1      cgd 	rep->r_flags = rep->r_rexmit = 0;
    755  1.1      cgd 	/*
    756  1.1      cgd 	 * Three cases:
    757  1.1      cgd 	 * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
    758  1.1      cgd 	 * - idempotent requests on SOCK_DGRAM use 0
    759  1.1      cgd 	 * - Reliable transports, NFS_RELIABLETIMEO
    760  1.1      cgd 	 *   Timeouts are still done on reliable transports to ensure detection
    761  1.1      cgd 	 *   of excessive connection delay.
    762  1.1      cgd 	 */
    763  1.1      cgd 	if (nmp->nm_sotype != SOCK_DGRAM)
    764  1.1      cgd 		rep->r_timerinit = -NFS_RELIABLETIMEO;
    765  1.1      cgd 	else if (nonidempotent[procnum])
    766  1.1      cgd 		rep->r_timerinit = -NFS_MINIDEMTIMEO;
    767  1.1      cgd 	else
    768  1.1      cgd 		rep->r_timerinit = 0;
    769  1.1      cgd 	rep->r_timer = rep->r_timerinit;
    770  1.1      cgd 	rep->r_mrep = NULL;
    771  1.1      cgd 	len = 0;
    772  1.1      cgd 	while (m) {
    773  1.1      cgd 		len += m->m_len;
    774  1.1      cgd 		m = m->m_next;
    775  1.1      cgd 	}
    776  1.1      cgd 	mreq->m_pkthdr.len = len;
    777  1.1      cgd 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
    778  1.1      cgd 	compressed = 0;
    779  1.1      cgd 	m = mreq;
    780  1.1      cgd 	if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) {
    781  1.1      cgd 		mreq = nfs_compress(mreq);
    782  1.1      cgd 		if (mreq != m) {
    783  1.1      cgd 			len = mreq->m_pkthdr.len;
    784  1.1      cgd 			compressed++;
    785  1.1      cgd 		}
    786  1.1      cgd 	}
    787  1.1      cgd 	/*
    788  1.1      cgd 	 * For non-atomic protocols, insert a Sun RPC Record Mark.
    789  1.1      cgd 	 */
    790  1.1      cgd 	if ((nmp->nm_soflags & PR_ATOMIC) == 0) {
    791  1.1      cgd 		M_PREPEND(mreq, sizeof(u_long), M_WAIT);
    792  1.1      cgd 		*mtod(mreq, u_long *) = htonl(0x80000000 | len);
    793  1.1      cgd 	}
    794  1.1      cgd 	rep->r_mreq = mreq;
    795  1.1      cgd 
    796  1.1      cgd 	/*
    797  1.1      cgd 	 * Do the client side RPC.
    798  1.1      cgd 	 */
    799  1.1      cgd 	nfsstats.rpcrequests++;
    800  1.1      cgd 	/*
    801  1.1      cgd 	 * Chain request into list of outstanding requests. Be sure
    802  1.1      cgd 	 * to put it LAST so timer finds oldest requests first.
    803  1.1      cgd 	 */
    804  1.1      cgd 	s = splnet();
    805  1.1      cgd 	reph = &nfsreqh;
    806  1.1      cgd 	reph->r_prev->r_next = rep;
    807  1.1      cgd 	rep->r_prev = reph->r_prev;
    808  1.1      cgd 	reph->r_prev = rep;
    809  1.1      cgd 	rep->r_next = reph;
    810  1.1      cgd 	/*
    811  1.1      cgd 	 * If backing off another request or avoiding congestion, don't
    812  1.1      cgd 	 * send this one now but let timer do it. If not timing a request,
    813  1.1      cgd 	 * do it now.
    814  1.1      cgd 	 */
    815  1.1      cgd 	if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM ||
    816  1.1      cgd 	    (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) {
    817  1.1      cgd 		nmp->nm_sent++;
    818  1.1      cgd 		rep->r_flags |= R_SENT;
    819  1.1      cgd 		if (nmp->nm_rtt == -1) {
    820  1.1      cgd 			nmp->nm_rtt = 0;
    821  1.1      cgd 			rep->r_flags |= R_TIMING;
    822  1.1      cgd 		}
    823  1.1      cgd 		splx(s);
    824  1.1      cgd 		m = m_copym(mreq, 0, M_COPYALL, M_WAIT);
    825  1.1      cgd 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    826  1.1      cgd 			nfs_solock(&nmp->nm_flag);
    827  1.1      cgd 		error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
    828  1.1      cgd 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    829  1.1      cgd 			nfs_sounlock(&nmp->nm_flag);
    830  1.1      cgd 		if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error))
    831  1.1      cgd 			nmp->nm_so->so_error = error = 0;
    832  1.1      cgd 	} else
    833  1.1      cgd 		splx(s);
    834  1.1      cgd 
    835  1.1      cgd 	/*
    836  1.1      cgd 	 * Wait for the reply from our send or the timer's.
    837  1.1      cgd 	 */
    838  1.1      cgd 	if (!error)
    839  1.1      cgd 		error = nfs_reply(nmp, rep);
    840  1.1      cgd 
    841  1.1      cgd 	/*
    842  1.1      cgd 	 * RPC done, unlink the request.
    843  1.1      cgd 	 */
    844  1.1      cgd 	s = splnet();
    845  1.1      cgd 	rep->r_prev->r_next = rep->r_next;
    846  1.1      cgd 	rep->r_next->r_prev = rep->r_prev;
    847  1.1      cgd 	splx(s);
    848  1.1      cgd 
    849  1.1      cgd 	/*
    850  1.1      cgd 	 * If there was a successful reply and a tprintf msg.
    851  1.1      cgd 	 * tprintf a response.
    852  1.1      cgd 	 */
    853  1.1      cgd 	if (!error && (rep->r_flags & R_TPRINTFMSG))
    854  1.1      cgd 		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    855  1.1      cgd 		    "is alive again");
    856  1.1      cgd 	m_freem(rep->r_mreq);
    857  1.1      cgd 	mrep = rep->r_mrep;
    858  1.1      cgd 	FREE((caddr_t)rep, M_NFSREQ);
    859  1.1      cgd 	if (error)
    860  1.1      cgd 		return (error);
    861  1.1      cgd 
    862  1.1      cgd 	if (compressed)
    863  1.1      cgd 		mrep = nfs_uncompress(mrep);
    864  1.1      cgd 	md = mrep;
    865  1.1      cgd 	/*
    866  1.1      cgd 	 * break down the rpc header and check if ok
    867  1.1      cgd 	 */
    868  1.1      cgd 	dpos = mtod(md, caddr_t);
    869  1.1      cgd 	nfsm_disect(tl, u_long *, 5*NFSX_UNSIGNED);
    870  1.1      cgd 	tl += 2;
    871  1.1      cgd 	if (*tl++ == rpc_msgdenied) {
    872  1.1      cgd 		if (*tl == rpc_mismatch)
    873  1.1      cgd 			error = EOPNOTSUPP;
    874  1.1      cgd 		else
    875  1.1      cgd 			error = EACCES;
    876  1.1      cgd 		m_freem(mrep);
    877  1.1      cgd 		return (error);
    878  1.1      cgd 	}
    879  1.1      cgd 	/*
    880  1.1      cgd 	 * skip over the auth_verf, someday we may want to cache auth_short's
    881  1.1      cgd 	 * for nfs_reqhead(), but for now just dump it
    882  1.1      cgd 	 */
    883  1.1      cgd 	if (*++tl != 0) {
    884  1.1      cgd 		len = nfsm_rndup(fxdr_unsigned(long, *tl));
    885  1.1      cgd 		nfsm_adv(len);
    886  1.1      cgd 	}
    887  1.1      cgd 	nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    888  1.1      cgd 	/* 0 == ok */
    889  1.1      cgd 	if (*tl == 0) {
    890  1.1      cgd 		nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    891  1.1      cgd 		if (*tl != 0) {
    892  1.1      cgd 			error = fxdr_unsigned(int, *tl);
    893  1.1      cgd 			m_freem(mrep);
    894  1.1      cgd 			return (error);
    895  1.1      cgd 		}
    896  1.1      cgd 		*mrp = mrep;
    897  1.1      cgd 		*mdp = md;
    898  1.1      cgd 		*dposp = dpos;
    899  1.1      cgd 		return (0);
    900  1.1      cgd 	}
    901  1.1      cgd 	m_freem(mrep);
    902  1.1      cgd 	return (EPROTONOSUPPORT);
    903  1.1      cgd nfsmout:
    904  1.1      cgd 	return (error);
    905  1.1      cgd }
    906  1.1      cgd 
    907  1.1      cgd /*
    908  1.1      cgd  * Get a request for the server main loop
    909  1.1      cgd  * - receive a request via. nfs_soreceive()
    910  1.1      cgd  * - verify it
    911  1.1      cgd  * - fill in the cred struct.
    912  1.1      cgd  */
    913  1.1      cgd nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr,
    914  1.2      cgd 	msk, mtch, wascomp, repstat)				/* 08 Aug 92*/
    915  1.1      cgd 	struct socket *so;
    916  1.1      cgd 	u_long prog;
    917  1.1      cgd 	u_long vers;
    918  1.1      cgd 	int maxproc;
    919  1.1      cgd 	struct mbuf **nam;
    920  1.1      cgd 	struct mbuf **mrp;
    921  1.1      cgd 	struct mbuf **mdp;
    922  1.1      cgd 	caddr_t *dposp;
    923  1.1      cgd 	u_long *retxid;
    924  1.1      cgd 	u_long *procnum;
    925  1.1      cgd 	register struct ucred *cr;
    926  1.1      cgd 	struct mbuf *msk, *mtch;
    927  1.2      cgd 	int *wascomp, *repstat;					/* 08 Aug 92*/
    928  1.1      cgd {
    929  1.1      cgd 	register int i;
    930  1.1      cgd 	register u_long *tl;
    931  1.1      cgd 	register long t1;
    932  1.1      cgd 	caddr_t dpos, cp2;
    933  1.1      cgd 	int error = 0;
    934  1.1      cgd 	struct mbuf *mrep, *md;
    935  1.1      cgd 	int len;
    936  1.1      cgd 
    937  1.2      cgd 	*repstat = 0;						/* 08 Aug 92*/
    938  1.1      cgd 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    939  1.1      cgd 		error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    940  1.1      cgd 	} else {
    941  1.1      cgd 		mrep = (struct mbuf *)0;
    942  1.1      cgd 		do {
    943  1.1      cgd 			if (mrep) {
    944  1.1      cgd 				m_freem(*nam);
    945  1.1      cgd 				m_freem(mrep);
    946  1.1      cgd 			}
    947  1.1      cgd 			error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    948  1.1      cgd 		} while (!error && nfs_badnam(*nam, msk, mtch));
    949  1.1      cgd 	}
    950  1.1      cgd 	if (error)
    951  1.1      cgd 		return (error);
    952  1.1      cgd 	md = mrep;
    953  1.1      cgd 	mrep = nfs_uncompress(mrep);
    954  1.1      cgd 	if (mrep != md) {
    955  1.1      cgd 		*wascomp = 1;
    956  1.1      cgd 		md = mrep;
    957  1.1      cgd 	} else
    958  1.1      cgd 		*wascomp = 0;
    959  1.1      cgd 	dpos = mtod(mrep, caddr_t);
    960  1.1      cgd 	nfsm_disect(tl, u_long *, 10*NFSX_UNSIGNED);
    961  1.8       ws 	*retxid = fxdr_unsigned(u_long, *tl++);
    962  1.2      cgd 	if (*tl++ != rpc_call || *tl++ != rpc_vers) {		/* 08 Aug 92*/
    963  1.2      cgd 		*mrp = mrep;
    964  1.2      cgd 		*procnum = NFSPROC_NOOP;
    965  1.2      cgd 		*repstat = ERPCMISMATCH;
    966  1.2      cgd 		return (0);
    967  1.1      cgd 	}
    968  1.1      cgd 	if (*tl++ != prog) {
    969  1.2      cgd 		*mrp = mrep;					/* 08 Aug 92*/
    970  1.2      cgd 		*procnum = NFSPROC_NOOP;
    971  1.2      cgd 		*repstat = EPROGUNAVAIL;
    972  1.2      cgd 		return (0);
    973  1.1      cgd 	}
    974  1.1      cgd 	if (*tl++ != vers) {
    975  1.2      cgd 		*mrp = mrep;					/* 08 Aug 92*/
    976  1.2      cgd 		*procnum = NFSPROC_NOOP;
    977  1.2      cgd 		*repstat = EPROGMISMATCH;
    978  1.2      cgd 		return (0);
    979  1.1      cgd 	}
    980  1.1      cgd 	*procnum = fxdr_unsigned(u_long, *tl++);
    981  1.1      cgd 	if (*procnum == NFSPROC_NULL) {
    982  1.1      cgd 		*mrp = mrep;
    983  1.1      cgd 		return (0);
    984  1.1      cgd 	}
    985  1.1      cgd 	if (*procnum > maxproc || *tl++ != rpc_auth_unix) {
    986  1.2      cgd 		*mrp = mrep;					/* 08 Aug 92*/
    987  1.2      cgd 		*procnum = NFSPROC_NOOP;
    988  1.2      cgd 		*repstat = EPROCUNAVAIL;
    989  1.2      cgd 		return (0);
    990  1.1      cgd 	}
    991  1.1      cgd 	len = fxdr_unsigned(int, *tl++);
    992  1.1      cgd 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
    993  1.1      cgd 		m_freem(mrep);
    994  1.1      cgd 		return (EBADRPC);
    995  1.1      cgd 	}
    996  1.1      cgd 	len = fxdr_unsigned(int, *++tl);
    997  1.1      cgd 	if (len < 0 || len > NFS_MAXNAMLEN) {
    998  1.1      cgd 		m_freem(mrep);
    999  1.1      cgd 		return (EBADRPC);
   1000  1.1      cgd 	}
   1001  1.1      cgd 	nfsm_adv(nfsm_rndup(len));
   1002  1.1      cgd 	nfsm_disect(tl, u_long *, 3*NFSX_UNSIGNED);
   1003  1.1      cgd 	cr->cr_uid = fxdr_unsigned(uid_t, *tl++);
   1004  1.1      cgd 	cr->cr_gid = fxdr_unsigned(gid_t, *tl++);
   1005  1.1      cgd 	len = fxdr_unsigned(int, *tl);
   1006  1.1      cgd 	if (len < 0 || len > RPCAUTH_UNIXGIDS) {
   1007  1.1      cgd 		m_freem(mrep);
   1008  1.1      cgd 		return (EBADRPC);
   1009  1.1      cgd 	}
   1010  1.1      cgd 	nfsm_disect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
   1011  1.1      cgd 	for (i = 1; i <= len; i++)
   1012  1.1      cgd 		if (i < NGROUPS)
   1013  1.1      cgd 			cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
   1014  1.1      cgd 		else
   1015  1.1      cgd 			tl++;
   1016  1.1      cgd 	cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
   1017  1.1      cgd 	/*
   1018  1.1      cgd 	 * Do we have any use for the verifier.
   1019  1.1      cgd 	 * According to the "Remote Procedure Call Protocol Spec." it
   1020  1.1      cgd 	 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
   1021  1.1      cgd 	 * For now, just skip over it
   1022  1.1      cgd 	 */
   1023  1.1      cgd 	len = fxdr_unsigned(int, *++tl);
   1024  1.1      cgd 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
   1025  1.1      cgd 		m_freem(mrep);
   1026  1.1      cgd 		return (EBADRPC);
   1027  1.1      cgd 	}
   1028  1.1      cgd 	if (len > 0)
   1029  1.1      cgd 		nfsm_adv(nfsm_rndup(len));
   1030  1.1      cgd 	*mrp = mrep;
   1031  1.1      cgd 	*mdp = md;
   1032  1.1      cgd 	*dposp = dpos;
   1033  1.1      cgd 	return (0);
   1034  1.1      cgd nfsmout:
   1035  1.1      cgd 	return (error);
   1036  1.1      cgd }
   1037  1.1      cgd 
   1038  1.1      cgd /*
   1039  1.1      cgd  * Generate the rpc reply header
   1040  1.1      cgd  * siz arg. is used to decide if adding a cluster is worthwhile
   1041  1.1      cgd  */
   1042  1.1      cgd nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
   1043  1.1      cgd 	int siz;
   1044  1.1      cgd 	u_long retxid;
   1045  1.1      cgd 	int err;
   1046  1.1      cgd 	struct mbuf **mrq;
   1047  1.1      cgd 	struct mbuf **mbp;
   1048  1.1      cgd 	caddr_t *bposp;
   1049  1.1      cgd {
   1050  1.1      cgd 	register u_long *tl;
   1051  1.1      cgd 	register long t1;
   1052  1.1      cgd 	caddr_t bpos;
   1053  1.1      cgd 	struct mbuf *mreq, *mb, *mb2;
   1054  1.1      cgd 
   1055  1.1      cgd 	NFSMGETHDR(mreq);
   1056  1.1      cgd 	mb = mreq;
   1057  1.1      cgd 	if ((siz+RPC_REPLYSIZ) > MHLEN)
   1058  1.1      cgd 		MCLGET(mreq, M_WAIT);
   1059  1.1      cgd 	tl = mtod(mreq, u_long *);
   1060  1.1      cgd 	mreq->m_len = 6*NFSX_UNSIGNED;
   1061  1.1      cgd 	bpos = ((caddr_t)tl)+mreq->m_len;
   1062  1.8       ws 	*tl++ = txdr_unsigned(retxid);
   1063  1.1      cgd 	*tl++ = rpc_reply;
   1064  1.1      cgd 	if (err == ERPCMISMATCH) {
   1065  1.1      cgd 		*tl++ = rpc_msgdenied;
   1066  1.1      cgd 		*tl++ = rpc_mismatch;
   1067  1.1      cgd 		*tl++ = txdr_unsigned(2);
   1068  1.1      cgd 		*tl = txdr_unsigned(2);
   1069  1.1      cgd 	} else {
   1070  1.1      cgd 		*tl++ = rpc_msgaccepted;
   1071  1.1      cgd 		*tl++ = 0;
   1072  1.1      cgd 		*tl++ = 0;
   1073  1.1      cgd 		switch (err) {
   1074  1.1      cgd 		case EPROGUNAVAIL:
   1075  1.1      cgd 			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
   1076  1.1      cgd 			break;
   1077  1.1      cgd 		case EPROGMISMATCH:
   1078  1.1      cgd 			*tl = txdr_unsigned(RPC_PROGMISMATCH);
   1079  1.1      cgd 			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
   1080  1.1      cgd 			*tl++ = txdr_unsigned(2);
   1081  1.1      cgd 			*tl = txdr_unsigned(2);	/* someday 3 */
   1082  1.1      cgd 			break;
   1083  1.1      cgd 		case EPROCUNAVAIL:
   1084  1.1      cgd 			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
   1085  1.1      cgd 			break;
   1086  1.1      cgd 		default:
   1087  1.1      cgd 			*tl = 0;
   1088  1.1      cgd 			if (err != VNOVAL) {
   1089  1.1      cgd 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
   1090  1.1      cgd 				*tl = txdr_unsigned(err);
   1091  1.1      cgd 			}
   1092  1.1      cgd 			break;
   1093  1.1      cgd 		};
   1094  1.1      cgd 	}
   1095  1.1      cgd 	*mrq = mreq;
   1096  1.1      cgd 	*mbp = mb;
   1097  1.1      cgd 	*bposp = bpos;
   1098  1.1      cgd 	if (err != 0 && err != VNOVAL)
   1099  1.1      cgd 		nfsstats.srvrpc_errs++;
   1100  1.1      cgd 	return (0);
   1101  1.1      cgd }
   1102  1.1      cgd 
   1103  1.1      cgd /*
   1104  1.1      cgd  * Nfs timer routine
   1105  1.1      cgd  * Scan the nfsreq list and retranmit any requests that have timed out
   1106  1.1      cgd  * To avoid retransmission attempts on STREAM sockets (in the future) make
   1107  1.1      cgd  * sure to set the r_retry field to 0 (implies nm_retry == 0).
   1108  1.1      cgd  */
   1109  1.7  mycroft void
   1110  1.1      cgd nfs_timer()
   1111  1.1      cgd {
   1112  1.1      cgd 	register struct nfsreq *rep;
   1113  1.1      cgd 	register struct mbuf *m;
   1114  1.1      cgd 	register struct socket *so;
   1115  1.1      cgd 	register struct nfsmount *nmp;
   1116  1.1      cgd 	int s, error;
   1117  1.1      cgd 
   1118  1.1      cgd 	s = splnet();
   1119  1.1      cgd 	for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
   1120  1.1      cgd 		nmp = rep->r_nmp;
   1121  1.1      cgd 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) ||
   1122  1.1      cgd 		    (so = nmp->nm_so) == NULL)
   1123  1.1      cgd 			continue;
   1124  1.1      cgd 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) {
   1125  1.1      cgd 			rep->r_flags |= R_SOFTTERM;
   1126  1.1      cgd 			continue;
   1127  1.1      cgd 		}
   1128  1.1      cgd 		if (rep->r_flags & R_TIMING)	/* update rtt in mount */
   1129  1.1      cgd 			nmp->nm_rtt++;
   1130  1.1      cgd 		/* If not timed out */
   1131  1.1      cgd 		if (++rep->r_timer < nmp->nm_rto)
   1132  1.1      cgd 			continue;
   1133  1.1      cgd 		/* Do backoff and save new timeout in mount */
   1134  1.1      cgd 		if (rep->r_flags & R_TIMING) {
   1135  1.1      cgd 			nfs_backofftimer(nmp);
   1136  1.1      cgd 			rep->r_flags &= ~R_TIMING;
   1137  1.1      cgd 			nmp->nm_rtt = -1;
   1138  1.1      cgd 		}
   1139  1.1      cgd 		if (rep->r_flags & R_SENT) {
   1140  1.1      cgd 			rep->r_flags &= ~R_SENT;
   1141  1.1      cgd 			nmp->nm_sent--;
   1142  1.1      cgd 		}
   1143  1.1      cgd 
   1144  1.1      cgd 		/*
   1145  1.1      cgd 		 * Check for too many retries on soft mount.
   1146  1.1      cgd 		 * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
   1147  1.1      cgd 		 */
   1148  1.1      cgd 		if (++rep->r_rexmit > NFS_MAXREXMIT)
   1149  1.1      cgd 			rep->r_rexmit = NFS_MAXREXMIT;
   1150  1.1      cgd 
   1151  1.1      cgd 		/*
   1152  1.1      cgd 		 * Check for server not responding
   1153  1.1      cgd 		 */
   1154  1.1      cgd 		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
   1155  1.1      cgd 		     rep->r_rexmit > NFS_FISHY) {
   1156  1.1      cgd 			nfs_msg(rep->r_procp,
   1157  1.1      cgd 			    nmp->nm_mountp->mnt_stat.f_mntfromname,
   1158  1.1      cgd 			    "not responding");
   1159  1.1      cgd 			rep->r_flags |= R_TPRINTFMSG;
   1160  1.1      cgd 		}
   1161  1.1      cgd 		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
   1162  1.1      cgd 			nfsstats.rpctimeouts++;
   1163  1.1      cgd 			rep->r_flags |= R_SOFTTERM;
   1164  1.1      cgd 			continue;
   1165  1.1      cgd 		}
   1166  1.1      cgd 		if (nmp->nm_sotype != SOCK_DGRAM)
   1167  1.1      cgd 			continue;
   1168  1.1      cgd 
   1169  1.1      cgd 		/*
   1170  1.1      cgd 		 * If there is enough space and the window allows..
   1171  1.1      cgd 		 *	Resend it
   1172  1.1      cgd 		 */
   1173  1.1      cgd 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
   1174  1.1      cgd 		       nmp->nm_sent < nmp->nm_window &&
   1175  1.1      cgd 		       (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
   1176  1.1      cgd 			nfsstats.rpcretries++;
   1177  1.1      cgd 			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
   1178  1.1      cgd 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1179  1.1      cgd 			    (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0);
   1180  1.1      cgd 			else
   1181  1.1      cgd 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1182  1.1      cgd 			    nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0);
   1183  1.1      cgd 			if (error) {
   1184  1.1      cgd 				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
   1185  1.1      cgd 					so->so_error = 0;
   1186  1.1      cgd 			} else {
   1187  1.1      cgd 				/*
   1188  1.1      cgd 				 * We need to time the request even though we
   1189  1.1      cgd 				 * are retransmitting.
   1190  1.1      cgd 				 */
   1191  1.1      cgd 				nmp->nm_rtt = 0;
   1192  1.1      cgd 				nmp->nm_sent++;
   1193  1.1      cgd 				rep->r_flags |= (R_SENT|R_TIMING);
   1194  1.1      cgd 				rep->r_timer = rep->r_timerinit;
   1195  1.1      cgd 			}
   1196  1.1      cgd 		}
   1197  1.1      cgd 	}
   1198  1.1      cgd 	splx(s);
   1199  1.1      cgd 	timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ);
   1200  1.1      cgd }
   1201  1.1      cgd 
   1202  1.1      cgd /*
   1203  1.1      cgd  * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
   1204  1.1      cgd  * used here. The timer state is held in the nfsmount structure and
   1205  1.1      cgd  * a single request is used to clock the response. When successful
   1206  1.1      cgd  * the rtt smoothing in nfs_updatetimer is used, when failed the backoff
   1207  1.1      cgd  * is done by nfs_backofftimer. We also log failure messages in these
   1208  1.1      cgd  * routines.
   1209  1.1      cgd  *
   1210  1.1      cgd  * Congestion variables are held in the nfshost structure which
   1211  1.1      cgd  * is referenced by nfsmounts and shared per-server. This separation
   1212  1.1      cgd  * makes it possible to do per-mount timing which allows varying disk
   1213  1.1      cgd  * access times to be dealt with, while preserving a network oriented
   1214  1.1      cgd  * congestion control scheme.
   1215  1.1      cgd  *
   1216  1.1      cgd  * The windowing implements the Jacobson/Karels slowstart algorithm
   1217  1.1      cgd  * with adjusted scaling factors. We start with one request, then send
   1218  1.1      cgd  * 4 more after each success until the ssthresh limit is reached, then
   1219  1.1      cgd  * we increment at a rate proportional to the window. On failure, we
   1220  1.1      cgd  * remember 3/4 the current window and clamp the send limit to 1. Note
   1221  1.1      cgd  * ICMP source quench is not reflected in so->so_error so we ignore that
   1222  1.1      cgd  * for now.
   1223  1.1      cgd  *
   1224  1.1      cgd  * NFS behaves much more like a transport protocol with these changes,
   1225  1.1      cgd  * shedding the teenage pedal-to-the-metal tendencies of "other"
   1226  1.1      cgd  * implementations.
   1227  1.1      cgd  *
   1228  1.1      cgd  * Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
   1229  1.1      cgd  */
   1230  1.1      cgd 
   1231  1.1      cgd /*
   1232  1.1      cgd  * The TCP algorithm was not forgiving enough. Because the NFS server
   1233  1.1      cgd  * responds only after performing lookups/diskio/etc, we have to be
   1234  1.1      cgd  * more prepared to accept a spiky variance. The TCP algorithm is:
   1235  1.1      cgd  * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
   1236  1.1      cgd  */
   1237  1.1      cgd #define NFS_RTO(nmp)	(((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
   1238  1.1      cgd 
   1239  1.1      cgd nfs_updatetimer(nmp)
   1240  1.1      cgd 	register struct nfsmount *nmp;
   1241  1.1      cgd {
   1242  1.1      cgd 
   1243  1.1      cgd 	/* If retransmitted, clear and return */
   1244  1.1      cgd 	if (nmp->nm_rexmit || nmp->nm_currexmit) {
   1245  1.1      cgd 		nmp->nm_rexmit = nmp->nm_currexmit = 0;
   1246  1.1      cgd 		return;
   1247  1.1      cgd 	}
   1248  1.1      cgd 	/* If have a measurement, do smoothing */
   1249  1.1      cgd 	if (nmp->nm_srtt) {
   1250  1.1      cgd 		register short delta;
   1251  1.1      cgd 		delta = nmp->nm_rtt - (nmp->nm_srtt >> 3);
   1252  1.1      cgd 		if ((nmp->nm_srtt += delta) <= 0)
   1253  1.1      cgd 			nmp->nm_srtt = 1;
   1254  1.1      cgd 		if (delta < 0)
   1255  1.1      cgd 			delta = -delta;
   1256  1.1      cgd 		delta -= (nmp->nm_rttvar >> 2);
   1257  1.1      cgd 		if ((nmp->nm_rttvar += delta) <= 0)
   1258  1.1      cgd 			nmp->nm_rttvar = 1;
   1259  1.1      cgd 	/* Else initialize */
   1260  1.1      cgd 	} else {
   1261  1.1      cgd 		nmp->nm_rttvar = nmp->nm_rtt << 1;
   1262  1.1      cgd 		if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2;
   1263  1.1      cgd 		nmp->nm_srtt = nmp->nm_rttvar << 2;
   1264  1.1      cgd 	}
   1265  1.1      cgd 	/* Compute new Retransmission TimeOut and clip */
   1266  1.1      cgd 	nmp->nm_rto = NFS_RTO(nmp);
   1267  1.1      cgd 	if (nmp->nm_rto < NFS_MINTIMEO)
   1268  1.1      cgd 		nmp->nm_rto = NFS_MINTIMEO;
   1269  1.1      cgd 	else if (nmp->nm_rto > NFS_MAXTIMEO)
   1270  1.1      cgd 		nmp->nm_rto = NFS_MAXTIMEO;
   1271  1.1      cgd 
   1272  1.1      cgd 	/* Update window estimate */
   1273  1.1      cgd 	if (nmp->nm_window < nmp->nm_ssthresh)	/* quickly */
   1274  1.1      cgd 		nmp->nm_window += 4;
   1275  1.1      cgd 	else {						/* slowly */
   1276  1.1      cgd 		register long incr = ++nmp->nm_winext;
   1277  1.1      cgd 		incr = (incr * incr) / nmp->nm_window;
   1278  1.1      cgd 		if (incr > 0) {
   1279  1.1      cgd 			nmp->nm_winext = 0;
   1280  1.1      cgd 			++nmp->nm_window;
   1281  1.1      cgd 		}
   1282  1.1      cgd 	}
   1283  1.1      cgd 	if (nmp->nm_window > NFS_MAXWINDOW)
   1284  1.1      cgd 		nmp->nm_window = NFS_MAXWINDOW;
   1285  1.1      cgd }
   1286  1.1      cgd 
   1287  1.1      cgd nfs_backofftimer(nmp)
   1288  1.1      cgd 	register struct nfsmount *nmp;
   1289  1.1      cgd {
   1290  1.1      cgd 	register unsigned long newrto;
   1291  1.1      cgd 
   1292  1.1      cgd 	/* Clip shift count */
   1293  1.1      cgd 	if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto)
   1294  1.1      cgd 		nmp->nm_rexmit = 8 * sizeof nmp->nm_rto;
   1295  1.1      cgd 	/* Back off RTO exponentially */
   1296  1.1      cgd 	newrto = NFS_RTO(nmp);
   1297  1.1      cgd 	newrto <<= (nmp->nm_rexmit - 1);
   1298  1.1      cgd 	if (newrto == 0 || newrto > NFS_MAXTIMEO)
   1299  1.1      cgd 		newrto = NFS_MAXTIMEO;
   1300  1.1      cgd 	nmp->nm_rto = newrto;
   1301  1.1      cgd 
   1302  1.1      cgd 	/* If too many retries, message, assume a bogus RTT and re-measure */
   1303  1.1      cgd 	if (nmp->nm_currexmit < nmp->nm_rexmit) {
   1304  1.1      cgd 		nmp->nm_currexmit = nmp->nm_rexmit;
   1305  1.1      cgd 		if (nmp->nm_currexmit >= nfsrexmtthresh) {
   1306  1.1      cgd 			if (nmp->nm_currexmit == nfsrexmtthresh) {
   1307  1.1      cgd 				nmp->nm_rttvar += (nmp->nm_srtt >> 2);
   1308  1.1      cgd 				nmp->nm_srtt = 0;
   1309  1.1      cgd 			}
   1310  1.1      cgd 		}
   1311  1.1      cgd 	}
   1312  1.1      cgd 	/* Close down window but remember this point (3/4 current) for later */
   1313  1.1      cgd 	nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2;
   1314  1.1      cgd 	nmp->nm_window = 1;
   1315  1.1      cgd 	nmp->nm_winext = 0;
   1316  1.1      cgd }
   1317  1.1      cgd 
   1318  1.1      cgd /*
   1319  1.1      cgd  * Test for a termination signal pending on procp.
   1320  1.1      cgd  * This is used for NFSMNT_INT mounts.
   1321  1.1      cgd  */
   1322  1.1      cgd nfs_sigintr(p)
   1323  1.1      cgd 	register struct proc *p;
   1324  1.1      cgd {
   1325  1.1      cgd 	if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) &
   1326  1.1      cgd 	    NFSINT_SIGMASK))
   1327  1.1      cgd 		return (1);
   1328  1.1      cgd 	else
   1329  1.1      cgd 		return (0);
   1330  1.1      cgd }
   1331  1.1      cgd 
   1332  1.1      cgd nfs_msg(p, server, msg)
   1333  1.1      cgd 	struct proc *p;
   1334  1.1      cgd 	char *server, *msg;
   1335  1.1      cgd {
   1336  1.1      cgd 	tpr_t tpr;
   1337  1.1      cgd 
   1338  1.1      cgd 	if (p)
   1339  1.1      cgd 		tpr = tprintf_open(p);
   1340  1.1      cgd 	else
   1341  1.1      cgd 		tpr = NULL;
   1342  1.1      cgd 	tprintf(tpr, "nfs server %s: %s\n", server, msg);
   1343  1.1      cgd 	tprintf_close(tpr);
   1344  1.1      cgd }
   1345  1.1      cgd 
   1346  1.1      cgd /*
   1347  1.1      cgd  * Lock a socket against others.
   1348  1.1      cgd  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
   1349  1.1      cgd  * and also to avoid race conditions between the processes with nfs requests
   1350  1.1      cgd  * in progress when a reconnect is necessary.
   1351  1.1      cgd  */
   1352  1.1      cgd nfs_solock(flagp)
   1353  1.1      cgd 	register int *flagp;
   1354  1.1      cgd {
   1355  1.1      cgd 
   1356  1.1      cgd 	while (*flagp & NFSMNT_SCKLOCK) {
   1357  1.1      cgd 		*flagp |= NFSMNT_WANTSCK;
   1358  1.1      cgd 		(void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0);
   1359  1.1      cgd 	}
   1360  1.1      cgd 	*flagp |= NFSMNT_SCKLOCK;
   1361  1.1      cgd }
   1362  1.1      cgd 
   1363  1.1      cgd /*
   1364  1.1      cgd  * Unlock the stream socket for others.
   1365  1.1      cgd  */
   1366  1.1      cgd nfs_sounlock(flagp)
   1367  1.1      cgd 	register int *flagp;
   1368  1.1      cgd {
   1369  1.1      cgd 
   1370  1.1      cgd 	if ((*flagp & NFSMNT_SCKLOCK) == 0)
   1371  1.1      cgd 		panic("nfs sounlock");
   1372  1.1      cgd 	*flagp &= ~NFSMNT_SCKLOCK;
   1373  1.1      cgd 	if (*flagp & NFSMNT_WANTSCK) {
   1374  1.1      cgd 		*flagp &= ~NFSMNT_WANTSCK;
   1375  1.1      cgd 		wakeup((caddr_t)flagp);
   1376  1.1      cgd 	}
   1377  1.1      cgd }
   1378  1.1      cgd 
   1379  1.1      cgd /*
   1380  1.1      cgd  * This function compares two net addresses by family and returns TRUE
   1381  1.1      cgd  * if they are the same.
   1382  1.1      cgd  * If there is any doubt, return FALSE.
   1383  1.1      cgd  */
   1384  1.1      cgd nfs_netaddr_match(nam1, nam2)
   1385  1.1      cgd 	struct mbuf *nam1, *nam2;
   1386  1.1      cgd {
   1387  1.1      cgd 	register struct sockaddr *saddr1, *saddr2;
   1388  1.1      cgd 
   1389  1.1      cgd 	saddr1 = mtod(nam1, struct sockaddr *);
   1390  1.1      cgd 	saddr2 = mtod(nam2, struct sockaddr *);
   1391  1.1      cgd 	if (saddr1->sa_family != saddr2->sa_family)
   1392  1.1      cgd 		return (0);
   1393  1.1      cgd 
   1394  1.1      cgd 	/*
   1395  1.1      cgd 	 * Must do each address family separately since unused fields
   1396  1.1      cgd 	 * are undefined values and not always zeroed.
   1397  1.1      cgd 	 */
   1398  1.1      cgd 	switch (saddr1->sa_family) {
   1399  1.1      cgd 	case AF_INET:
   1400  1.1      cgd 		if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr ==
   1401  1.1      cgd 		    ((struct sockaddr_in *)saddr2)->sin_addr.s_addr)
   1402  1.1      cgd 			return (1);
   1403  1.1      cgd 		break;
   1404  1.1      cgd 	default:
   1405  1.1      cgd 		break;
   1406  1.1      cgd 	};
   1407  1.1      cgd 	return (0);
   1408  1.1      cgd }
   1409  1.1      cgd 
   1410  1.1      cgd /*
   1411  1.1      cgd  * Check the hostname fields for nfsd's mask and match fields.
   1412  1.1      cgd  * By address family:
   1413  1.1      cgd  * - Bitwise AND the mask with the host address field
   1414  1.1      cgd  * - Compare for == with match
   1415  1.1      cgd  * return TRUE if not equal
   1416  1.1      cgd  */
   1417  1.1      cgd nfs_badnam(nam, msk, mtch)
   1418  1.1      cgd 	register struct mbuf *nam, *msk, *mtch;
   1419  1.1      cgd {
   1420  1.1      cgd 	switch (mtod(nam, struct sockaddr *)->sa_family) {
   1421  1.1      cgd 	case AF_INET:
   1422  1.1      cgd 		return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr &
   1423  1.1      cgd 			 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) !=
   1424  1.1      cgd 			 mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr);
   1425  1.1      cgd 	default:
   1426  1.1      cgd 		printf("nfs_badmatch, unknown sa_family\n");
   1427  1.1      cgd 		return (0);
   1428  1.1      cgd 	};
   1429  1.1      cgd }
   1430