Home | History | Annotate | Line # | Download | only in nfs
nfs_socket.c revision 1.3
      1  1.1    cgd /*
      2  1.1    cgd  * Copyright (c) 1989, 1991 The Regents of the University of California.
      3  1.1    cgd  * All rights reserved.
      4  1.1    cgd  *
      5  1.1    cgd  * This code is derived from software contributed to Berkeley by
      6  1.1    cgd  * Rick Macklem at The University of Guelph.
      7  1.1    cgd  *
      8  1.1    cgd  * Redistribution and use in source and binary forms, with or without
      9  1.1    cgd  * modification, are permitted provided that the following conditions
     10  1.1    cgd  * are met:
     11  1.1    cgd  * 1. Redistributions of source code must retain the above copyright
     12  1.1    cgd  *    notice, this list of conditions and the following disclaimer.
     13  1.1    cgd  * 2. Redistributions in binary form must reproduce the above copyright
     14  1.1    cgd  *    notice, this list of conditions and the following disclaimer in the
     15  1.1    cgd  *    documentation and/or other materials provided with the distribution.
     16  1.1    cgd  * 3. All advertising materials mentioning features or use of this software
     17  1.1    cgd  *    must display the following acknowledgement:
     18  1.1    cgd  *	This product includes software developed by the University of
     19  1.1    cgd  *	California, Berkeley and its contributors.
     20  1.1    cgd  * 4. Neither the name of the University nor the names of its contributors
     21  1.1    cgd  *    may be used to endorse or promote products derived from this software
     22  1.1    cgd  *    without specific prior written permission.
     23  1.1    cgd  *
     24  1.1    cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  1.1    cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  1.1    cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  1.1    cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  1.1    cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  1.1    cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  1.1    cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  1.1    cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  1.1    cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  1.1    cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  1.1    cgd  * SUCH DAMAGE.
     35  1.1    cgd  *
     36  1.1    cgd  *	@(#)nfs_socket.c	7.23 (Berkeley) 4/20/91
     37  1.2    cgd  *
     38  1.2    cgd  * PATCHES MAGIC                LEVEL   PATCH THAT GOT US HERE
     39  1.2    cgd  * --------------------         -----   ----------------------
     40  1.2    cgd  * CURRENT PATCH LEVEL:         1       00053
     41  1.2    cgd  * --------------------         -----   ----------------------
     42  1.2    cgd  *
     43  1.2    cgd  * 08 Sep 92    Rick "gopher I"         Fix "reserved port" bug, fixed for
     44  1.2    cgd  *						AIX3.2 NFS clients
     45  1.1    cgd  */
     46  1.1    cgd 
     47  1.1    cgd /*
     48  1.1    cgd  * Socket operations for use by nfs
     49  1.1    cgd  */
     50  1.1    cgd 
     51  1.1    cgd #include "param.h"
     52  1.1    cgd #include "proc.h"
     53  1.1    cgd #include "mount.h"
     54  1.1    cgd #include "kernel.h"
     55  1.1    cgd #include "malloc.h"
     56  1.1    cgd #include "mbuf.h"
     57  1.1    cgd #include "namei.h"
     58  1.1    cgd #include "vnode.h"
     59  1.1    cgd #include "domain.h"
     60  1.1    cgd #include "protosw.h"
     61  1.1    cgd #include "socket.h"
     62  1.1    cgd #include "socketvar.h"
     63  1.1    cgd #include "syslog.h"
     64  1.1    cgd #include "tprintf.h"
     65  1.1    cgd #include "../netinet/in.h"
     66  1.1    cgd #include "../netinet/tcp.h"
     67  1.1    cgd 
     68  1.1    cgd #include "rpcv2.h"
     69  1.1    cgd #include "nfsv2.h"
     70  1.1    cgd #include "nfs.h"
     71  1.1    cgd #include "xdr_subs.h"
     72  1.1    cgd #include "nfsm_subs.h"
     73  1.1    cgd #include "nfsmount.h"
     74  1.1    cgd 
     75  1.1    cgd #define	TRUE	1
     76  1.1    cgd #define	FALSE	0
     77  1.1    cgd 
     78  1.1    cgd /*
     79  1.1    cgd  * External data, mostly RPC constants in XDR form
     80  1.1    cgd  */
     81  1.1    cgd extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
     82  1.1    cgd 	rpc_msgaccepted, rpc_call;
     83  1.1    cgd extern u_long nfs_prog, nfs_vers;
     84  1.1    cgd /* Maybe these should be bits in a u_long ?? */
     85  1.3  glass /*
     86  1.3  glass  * Static array that defines which nfs rpc's are nonidempotent
     87  1.3  glass  */
     88  1.3  glass int nonidempotent[NFS_NPROCS] = {
     89  1.3  glass 	FALSE,
     90  1.3  glass 	FALSE,
     91  1.3  glass 	TRUE,
     92  1.3  glass 	FALSE,
     93  1.3  glass 	FALSE,
     94  1.3  glass 	FALSE,
     95  1.3  glass 	FALSE,
     96  1.3  glass 	FALSE,
     97  1.3  glass 	TRUE,
     98  1.3  glass 	TRUE,
     99  1.3  glass 	TRUE,
    100  1.3  glass 	TRUE,
    101  1.3  glass 	TRUE,
    102  1.3  glass 	TRUE,
    103  1.3  glass 	TRUE,
    104  1.3  glass 	TRUE,
    105  1.3  glass 	FALSE,
    106  1.3  glass 	FALSE,
    107  1.3  glass };
    108  1.1    cgd static int compressrequest[NFS_NPROCS] = {
    109  1.1    cgd 	FALSE,
    110  1.1    cgd 	TRUE,
    111  1.1    cgd 	TRUE,
    112  1.1    cgd 	FALSE,
    113  1.1    cgd 	TRUE,
    114  1.1    cgd 	TRUE,
    115  1.1    cgd 	TRUE,
    116  1.1    cgd 	FALSE,
    117  1.1    cgd 	FALSE,
    118  1.1    cgd 	TRUE,
    119  1.1    cgd 	TRUE,
    120  1.1    cgd 	TRUE,
    121  1.1    cgd 	TRUE,
    122  1.1    cgd 	TRUE,
    123  1.1    cgd 	TRUE,
    124  1.1    cgd 	TRUE,
    125  1.1    cgd 	TRUE,
    126  1.1    cgd 	TRUE,
    127  1.1    cgd };
    128  1.1    cgd int	nfs_sbwait();
    129  1.1    cgd void	nfs_disconnect();
    130  1.1    cgd struct mbuf *nfs_compress(), *nfs_uncompress();
    131  1.1    cgd 
    132  1.1    cgd 
    133  1.1    cgd struct nfsreq nfsreqh;
    134  1.1    cgd int nfsrexmtthresh = NFS_FISHY;
    135  1.1    cgd int nfs_tcpnodelay = 1;
    136  1.1    cgd 
    137  1.1    cgd /*
    138  1.1    cgd  * Initialize sockets and congestion for a new NFS connection.
    139  1.1    cgd  * We do not free the sockaddr if error.
    140  1.1    cgd  */
    141  1.1    cgd nfs_connect(nmp)
    142  1.1    cgd 	register struct nfsmount *nmp;
    143  1.1    cgd {
    144  1.1    cgd 	register struct socket *so;
    145  1.2    cgd 	struct sockaddr *saddr;					/* 08 Sep 92*/
    146  1.1    cgd 	int s, error, bufsize;
    147  1.1    cgd 	struct mbuf *m;
    148  1.2    cgd 	struct sockaddr_in *sin;				/* 08 Sep 92*/
    149  1.2    cgd 	u_short tport;						/* 08 Sep 92*/
    150  1.1    cgd 
    151  1.1    cgd 	nmp->nm_so = (struct socket *)0;
    152  1.2    cgd 	saddr = mtod(nmp->nm_nam, struct sockaddr *);		/* 08 Sep 92*/
    153  1.2    cgd 	if (error = socreate(saddr->sa_family,			/* 08 Sep 92*/
    154  1.1    cgd 		&nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
    155  1.1    cgd 		goto bad;
    156  1.1    cgd 	so = nmp->nm_so;
    157  1.1    cgd 	nmp->nm_soflags = so->so_proto->pr_flags;
    158  1.1    cgd 
    159  1.2    cgd 	/*
    160  1.2    cgd 	 * 08 Sep 92
    161  1.2    cgd 	 *
    162  1.2    cgd 	 * Some servers require that the client port be a reserved port number.
    163  1.2    cgd 	 */
    164  1.2    cgd 	if (saddr->sa_family == AF_INET) {
    165  1.2    cgd 		MGET(m, M_WAIT, MT_SONAME);
    166  1.2    cgd 		sin = mtod(m, struct sockaddr_in *);
    167  1.2    cgd 		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
    168  1.2    cgd 		sin->sin_family = AF_INET;
    169  1.2    cgd 		sin->sin_addr.s_addr = INADDR_ANY;
    170  1.2    cgd 		tport = IPPORT_RESERVED - 1;
    171  1.2    cgd 		sin->sin_port = htons(tport);
    172  1.2    cgd 		while (sobind(so, m) == EADDRINUSE &&
    173  1.2    cgd 		       --tport > IPPORT_RESERVED / 2)
    174  1.2    cgd 			sin->sin_port = htons(tport);
    175  1.2    cgd 		m_freem(m);
    176  1.2    cgd 	}
    177  1.2    cgd 
    178  1.1    cgd 	if (nmp->nm_sotype == SOCK_DGRAM)
    179  1.1    cgd 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR),
    180  1.1    cgd 		    NFS_MAXPACKET);
    181  1.1    cgd 	else
    182  1.1    cgd 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)),
    183  1.1    cgd 		    NFS_MAXPACKET + sizeof(u_long));
    184  1.1    cgd 	if (error = soreserve(so, bufsize, bufsize))
    185  1.1    cgd 		goto bad;
    186  1.1    cgd 
    187  1.1    cgd 	/*
    188  1.1    cgd 	 * Protocols that do not require connections may be optionally left
    189  1.1    cgd 	 * unconnected for servers that reply from a port other than NFS_PORT.
    190  1.1    cgd 	 */
    191  1.1    cgd 	if (nmp->nm_flag & NFSMNT_NOCONN) {
    192  1.1    cgd 		if (nmp->nm_soflags & PR_CONNREQUIRED) {
    193  1.1    cgd 			error = ENOTCONN;
    194  1.1    cgd 			goto bad;
    195  1.1    cgd 		}
    196  1.1    cgd 	} else {
    197  1.1    cgd 		if (error = soconnect(so, nmp->nm_nam))
    198  1.1    cgd 			goto bad;
    199  1.1    cgd 
    200  1.1    cgd 		/*
    201  1.1    cgd 		 * Wait for the connection to complete. Cribbed from the
    202  1.1    cgd 		 * connect system call but with the wait at negative prio.
    203  1.1    cgd 		 */
    204  1.1    cgd 		s = splnet();
    205  1.1    cgd 		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
    206  1.1    cgd 			(void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0);
    207  1.1    cgd 		splx(s);
    208  1.1    cgd 		if (so->so_error) {
    209  1.1    cgd 			error = so->so_error;
    210  1.1    cgd 			goto bad;
    211  1.1    cgd 		}
    212  1.1    cgd 	}
    213  1.1    cgd 	if (nmp->nm_sotype == SOCK_DGRAM) {
    214  1.1    cgd 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    215  1.1    cgd 			so->so_rcv.sb_timeo = (5 * hz);
    216  1.1    cgd 			so->so_snd.sb_timeo = (5 * hz);
    217  1.1    cgd 		} else {
    218  1.1    cgd 			so->so_rcv.sb_timeo = 0;
    219  1.1    cgd 			so->so_snd.sb_timeo = 0;
    220  1.1    cgd 		}
    221  1.1    cgd 		nmp->nm_rto = NFS_TIMEO;
    222  1.1    cgd 	} else {
    223  1.1    cgd 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    224  1.1    cgd 			so->so_rcv.sb_timeo = (5 * hz);
    225  1.1    cgd 			so->so_snd.sb_timeo = (5 * hz);
    226  1.1    cgd 		} else {
    227  1.1    cgd 			so->so_rcv.sb_timeo = 0;
    228  1.1    cgd 			so->so_snd.sb_timeo = 0;
    229  1.1    cgd 		}
    230  1.1    cgd 		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    231  1.1    cgd 			MGET(m, M_WAIT, MT_SOOPTS);
    232  1.1    cgd 			*mtod(m, int *) = 1;
    233  1.1    cgd 			m->m_len = sizeof(int);
    234  1.1    cgd 			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
    235  1.1    cgd 		}
    236  1.1    cgd 		if (so->so_proto->pr_domain->dom_family == AF_INET &&
    237  1.1    cgd 		    so->so_proto->pr_protocol == IPPROTO_TCP &&
    238  1.1    cgd 		    nfs_tcpnodelay) {
    239  1.1    cgd 			MGET(m, M_WAIT, MT_SOOPTS);
    240  1.1    cgd 			*mtod(m, int *) = 1;
    241  1.1    cgd 			m->m_len = sizeof(int);
    242  1.1    cgd 			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
    243  1.1    cgd 		}
    244  1.1    cgd 		nmp->nm_rto = 10 * NFS_TIMEO;		/* XXX */
    245  1.1    cgd 	}
    246  1.1    cgd 	so->so_rcv.sb_flags |= SB_NOINTR;
    247  1.1    cgd 	so->so_snd.sb_flags |= SB_NOINTR;
    248  1.1    cgd 
    249  1.1    cgd 	/* Initialize other non-zero congestion variables */
    250  1.1    cgd 	nmp->nm_window = 2;			/* Initial send window */
    251  1.1    cgd 	nmp->nm_ssthresh = NFS_MAXWINDOW;	/* Slowstart threshold */
    252  1.1    cgd 	nmp->nm_rttvar = nmp->nm_rto << 1;
    253  1.1    cgd 	nmp->nm_sent = 0;
    254  1.1    cgd 	nmp->nm_currexmit = 0;
    255  1.1    cgd 	return (0);
    256  1.1    cgd 
    257  1.1    cgd bad:
    258  1.1    cgd 	nfs_disconnect(nmp);
    259  1.1    cgd 	return (error);
    260  1.1    cgd }
    261  1.1    cgd 
    262  1.1    cgd /*
    263  1.1    cgd  * Reconnect routine:
    264  1.1    cgd  * Called when a connection is broken on a reliable protocol.
    265  1.1    cgd  * - clean up the old socket
    266  1.1    cgd  * - nfs_connect() again
    267  1.1    cgd  * - set R_MUSTRESEND for all outstanding requests on mount point
    268  1.1    cgd  * If this fails the mount point is DEAD!
    269  1.1    cgd  * nb: Must be called with the nfs_solock() set on the mount point.
    270  1.1    cgd  */
    271  1.1    cgd nfs_reconnect(rep, nmp)
    272  1.1    cgd 	register struct nfsreq *rep;
    273  1.1    cgd 	register struct nfsmount *nmp;
    274  1.1    cgd {
    275  1.1    cgd 	register struct nfsreq *rp;
    276  1.1    cgd 	int error;
    277  1.1    cgd 
    278  1.1    cgd 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    279  1.1    cgd 	    "trying reconnect");
    280  1.1    cgd 	while (error = nfs_connect(nmp)) {
    281  1.1    cgd #ifdef lint
    282  1.1    cgd 		error = error;
    283  1.1    cgd #endif /* lint */
    284  1.1    cgd 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp))
    285  1.1    cgd 			return (EINTR);
    286  1.1    cgd 		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
    287  1.1    cgd 	}
    288  1.1    cgd 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    289  1.1    cgd 	    "reconnected");
    290  1.1    cgd 
    291  1.1    cgd 	/*
    292  1.1    cgd 	 * Loop through outstanding request list and fix up all requests
    293  1.1    cgd 	 * on old socket.
    294  1.1    cgd 	 */
    295  1.1    cgd 	rp = nfsreqh.r_next;
    296  1.1    cgd 	while (rp != &nfsreqh) {
    297  1.1    cgd 		if (rp->r_nmp == nmp)
    298  1.1    cgd 			rp->r_flags |= R_MUSTRESEND;
    299  1.1    cgd 		rp = rp->r_next;
    300  1.1    cgd 	}
    301  1.1    cgd 	return (0);
    302  1.1    cgd }
    303  1.1    cgd 
    304  1.1    cgd /*
    305  1.1    cgd  * NFS disconnect. Clean up and unlink.
    306  1.1    cgd  */
    307  1.1    cgd void
    308  1.1    cgd nfs_disconnect(nmp)
    309  1.1    cgd 	register struct nfsmount *nmp;
    310  1.1    cgd {
    311  1.1    cgd 	register struct socket *so;
    312  1.1    cgd 
    313  1.1    cgd 	if (nmp->nm_so) {
    314  1.1    cgd 		so = nmp->nm_so;
    315  1.1    cgd 		nmp->nm_so = (struct socket *)0;
    316  1.1    cgd 		soshutdown(so, 2);
    317  1.1    cgd 		soclose(so);
    318  1.1    cgd 	}
    319  1.1    cgd }
    320  1.1    cgd 
    321  1.1    cgd /*
    322  1.1    cgd  * This is the nfs send routine. For connection based socket types, it
    323  1.1    cgd  * must be called with an nfs_solock() on the socket.
    324  1.1    cgd  * "rep == NULL" indicates that it has been called from a server.
    325  1.1    cgd  */
    326  1.1    cgd nfs_send(so, nam, top, rep)
    327  1.1    cgd 	register struct socket *so;
    328  1.1    cgd 	struct mbuf *nam;
    329  1.1    cgd 	register struct mbuf *top;
    330  1.1    cgd 	struct nfsreq *rep;
    331  1.1    cgd {
    332  1.1    cgd 	struct mbuf *sendnam;
    333  1.1    cgd 	int error, soflags;
    334  1.1    cgd 
    335  1.1    cgd 	if (rep) {
    336  1.1    cgd 		if (rep->r_flags & R_SOFTTERM) {
    337  1.1    cgd 			m_freem(top);
    338  1.1    cgd 			return (EINTR);
    339  1.1    cgd 		}
    340  1.1    cgd 		if (rep->r_nmp->nm_so == NULL &&
    341  1.1    cgd 		    (error = nfs_reconnect(rep, rep->r_nmp)))
    342  1.1    cgd 			return (error);
    343  1.1    cgd 		rep->r_flags &= ~R_MUSTRESEND;
    344  1.1    cgd 		so = rep->r_nmp->nm_so;
    345  1.1    cgd 		soflags = rep->r_nmp->nm_soflags;
    346  1.1    cgd 	} else
    347  1.1    cgd 		soflags = so->so_proto->pr_flags;
    348  1.1    cgd 	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
    349  1.1    cgd 		sendnam = (struct mbuf *)0;
    350  1.1    cgd 	else
    351  1.1    cgd 		sendnam = nam;
    352  1.1    cgd 
    353  1.1    cgd 	error = sosend(so, sendnam, (struct uio *)0, top,
    354  1.1    cgd 		(struct mbuf *)0, 0);
    355  1.1    cgd 	if (error == EWOULDBLOCK && rep) {
    356  1.1    cgd 		if (rep->r_flags & R_SOFTTERM)
    357  1.1    cgd 			error = EINTR;
    358  1.1    cgd 		else {
    359  1.1    cgd 			rep->r_flags |= R_MUSTRESEND;
    360  1.1    cgd 			error = 0;
    361  1.1    cgd 		}
    362  1.1    cgd 	}
    363  1.1    cgd 	/*
    364  1.1    cgd 	 * Ignore socket errors??
    365  1.1    cgd 	 */
    366  1.1    cgd 	if (error && error != EINTR && error != ERESTART)
    367  1.1    cgd 		error = 0;
    368  1.1    cgd 	return (error);
    369  1.1    cgd }
    370  1.1    cgd 
    371  1.1    cgd /*
    372  1.1    cgd  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
    373  1.1    cgd  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
    374  1.1    cgd  * Mark and consolidate the data into a new mbuf list.
    375  1.1    cgd  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
    376  1.1    cgd  *     small mbufs.
    377  1.1    cgd  * For SOCK_STREAM we must be very careful to read an entire record once
    378  1.1    cgd  * we have read any of it, even if the system call has been interrupted.
    379  1.1    cgd  */
    380  1.1    cgd nfs_receive(so, aname, mp, rep)
    381  1.1    cgd 	register struct socket *so;
    382  1.1    cgd 	struct mbuf **aname;
    383  1.1    cgd 	struct mbuf **mp;
    384  1.1    cgd 	register struct nfsreq *rep;
    385  1.1    cgd {
    386  1.1    cgd 	struct uio auio;
    387  1.1    cgd 	struct iovec aio;
    388  1.1    cgd 	register struct mbuf *m;
    389  1.1    cgd 	struct mbuf *m2, *mnew, **mbp;
    390  1.1    cgd 	caddr_t fcp, tcp;
    391  1.1    cgd 	u_long len;
    392  1.1    cgd 	struct mbuf **getnam;
    393  1.1    cgd 	int error, siz, mlen, soflags, rcvflg;
    394  1.1    cgd 
    395  1.1    cgd 	/*
    396  1.1    cgd 	 * Set up arguments for soreceive()
    397  1.1    cgd 	 */
    398  1.1    cgd 	*mp = (struct mbuf *)0;
    399  1.1    cgd 	*aname = (struct mbuf *)0;
    400  1.1    cgd 	if (rep)
    401  1.1    cgd 		soflags = rep->r_nmp->nm_soflags;
    402  1.1    cgd 	else
    403  1.1    cgd 		soflags = so->so_proto->pr_flags;
    404  1.1    cgd 
    405  1.1    cgd 	/*
    406  1.1    cgd 	 * For reliable protocols, lock against other senders/receivers
    407  1.1    cgd 	 * in case a reconnect is necessary.
    408  1.1    cgd 	 * For SOCK_STREAM, first get the Record Mark to find out how much
    409  1.1    cgd 	 * more there is to get.
    410  1.1    cgd 	 * We must lock the socket against other receivers
    411  1.1    cgd 	 * until we have an entire rpc request/reply.
    412  1.1    cgd 	 */
    413  1.1    cgd 	if (soflags & PR_CONNREQUIRED) {
    414  1.1    cgd tryagain:
    415  1.1    cgd 		/*
    416  1.1    cgd 		 * Check for fatal errors and resending request.
    417  1.1    cgd 		 */
    418  1.1    cgd 		if (rep) {
    419  1.1    cgd 			/*
    420  1.1    cgd 			 * Ugh: If a reconnect attempt just happened, nm_so
    421  1.1    cgd 			 * would have changed. NULL indicates a failed
    422  1.1    cgd 			 * attempt that has essentially shut down this
    423  1.1    cgd 			 * mount point.
    424  1.1    cgd 			 */
    425  1.1    cgd 			if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL ||
    426  1.1    cgd 				(rep->r_flags & R_SOFTTERM))
    427  1.1    cgd 				return (EINTR);
    428  1.1    cgd 			while (rep->r_flags & R_MUSTRESEND) {
    429  1.1    cgd 				m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
    430  1.1    cgd 				nfsstats.rpcretries++;
    431  1.1    cgd 				if (error = nfs_send(so, rep->r_nmp->nm_nam, m,
    432  1.1    cgd 					rep))
    433  1.1    cgd 					goto errout;
    434  1.1    cgd 			}
    435  1.1    cgd 		}
    436  1.1    cgd 		if ((soflags & PR_ATOMIC) == 0) {
    437  1.1    cgd 			aio.iov_base = (caddr_t) &len;
    438  1.1    cgd 			aio.iov_len = sizeof(u_long);
    439  1.1    cgd 			auio.uio_iov = &aio;
    440  1.1    cgd 			auio.uio_iovcnt = 1;
    441  1.1    cgd 			auio.uio_segflg = UIO_SYSSPACE;
    442  1.1    cgd 			auio.uio_rw = UIO_READ;
    443  1.1    cgd 			auio.uio_procp = (struct proc *)0;
    444  1.1    cgd 			auio.uio_offset = 0;
    445  1.1    cgd 			auio.uio_resid = sizeof(u_long);
    446  1.1    cgd 			do {
    447  1.1    cgd 			    rcvflg = MSG_WAITALL;
    448  1.1    cgd 			    error = soreceive(so, (struct mbuf **)0, &auio,
    449  1.1    cgd 				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
    450  1.1    cgd 			    if (error == EWOULDBLOCK && rep) {
    451  1.1    cgd 				if (rep->r_flags & R_SOFTTERM)
    452  1.1    cgd 					return (EINTR);
    453  1.1    cgd 				if (rep->r_flags & R_MUSTRESEND)
    454  1.1    cgd 					goto tryagain;
    455  1.1    cgd 			    }
    456  1.1    cgd 			} while (error == EWOULDBLOCK);
    457  1.1    cgd 			if (!error && auio.uio_resid > 0) {
    458  1.1    cgd 			    if (rep)
    459  1.1    cgd 				log(LOG_INFO,
    460  1.1    cgd 				   "short receive (%d/%d) from nfs server %s\n",
    461  1.1    cgd 				   sizeof(u_long) - auio.uio_resid,
    462  1.1    cgd 				   sizeof(u_long),
    463  1.1    cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    464  1.1    cgd 			    error = EPIPE;
    465  1.1    cgd 			}
    466  1.1    cgd 			if (error)
    467  1.1    cgd 				goto errout;
    468  1.1    cgd 			len = ntohl(len) & ~0x80000000;
    469  1.1    cgd 			/*
    470  1.1    cgd 			 * This is SERIOUS! We are out of sync with the sender
    471  1.1    cgd 			 * and forcing a disconnect/reconnect is all I can do.
    472  1.1    cgd 			 */
    473  1.1    cgd 			if (len > NFS_MAXPACKET) {
    474  1.1    cgd 			    if (rep)
    475  1.1    cgd 				log(LOG_ERR, "%s (%d) from nfs server %s\n",
    476  1.1    cgd 				    "impossible packet length",
    477  1.1    cgd 				    len,
    478  1.1    cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    479  1.1    cgd 			    error = EFBIG;
    480  1.1    cgd 			    goto errout;
    481  1.1    cgd 			}
    482  1.1    cgd 			auio.uio_resid = len;
    483  1.1    cgd 			do {
    484  1.1    cgd 			    rcvflg = MSG_WAITALL;
    485  1.1    cgd 			    error =  soreceive(so, (struct mbuf **)0,
    486  1.1    cgd 				&auio, mp, (struct mbuf **)0, &rcvflg);
    487  1.1    cgd 			} while (error == EWOULDBLOCK || error == EINTR ||
    488  1.1    cgd 				 error == ERESTART);
    489  1.1    cgd 			if (!error && auio.uio_resid > 0) {
    490  1.1    cgd 			    if (rep)
    491  1.1    cgd 				log(LOG_INFO,
    492  1.1    cgd 				   "short receive (%d/%d) from nfs server %s\n",
    493  1.1    cgd 				   len - auio.uio_resid, len,
    494  1.1    cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    495  1.1    cgd 			    error = EPIPE;
    496  1.1    cgd 			}
    497  1.1    cgd 		} else {
    498  1.1    cgd 			auio.uio_resid = len = 1000000;	/* Anything Big */
    499  1.1    cgd 			do {
    500  1.1    cgd 			    rcvflg = 0;
    501  1.1    cgd 			    error =  soreceive(so, (struct mbuf **)0,
    502  1.1    cgd 				&auio, mp, (struct mbuf **)0, &rcvflg);
    503  1.1    cgd 			    if (error == EWOULDBLOCK && rep) {
    504  1.1    cgd 				if (rep->r_flags & R_SOFTTERM)
    505  1.1    cgd 					return (EINTR);
    506  1.1    cgd 				if (rep->r_flags & R_MUSTRESEND)
    507  1.1    cgd 					goto tryagain;
    508  1.1    cgd 			    }
    509  1.1    cgd 			} while (error == EWOULDBLOCK);
    510  1.1    cgd 			if (!error && *mp == NULL)
    511  1.1    cgd 				error = EPIPE;
    512  1.1    cgd 			len -= auio.uio_resid;
    513  1.1    cgd 		}
    514  1.1    cgd errout:
    515  1.1    cgd 		if (error && rep && error != EINTR && error != ERESTART) {
    516  1.1    cgd 			m_freem(*mp);
    517  1.1    cgd 			*mp = (struct mbuf *)0;
    518  1.1    cgd 			if (error != EPIPE && rep)
    519  1.1    cgd 				log(LOG_INFO,
    520  1.1    cgd 				    "receive error %d from nfs server %s\n",
    521  1.1    cgd 				    error,
    522  1.1    cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    523  1.1    cgd 			nfs_disconnect(rep->r_nmp);
    524  1.1    cgd 			error = nfs_reconnect(rep, rep->r_nmp);
    525  1.1    cgd 			if (!error)
    526  1.1    cgd 				goto tryagain;
    527  1.1    cgd 		}
    528  1.1    cgd 	} else {
    529  1.1    cgd 		if (so->so_state & SS_ISCONNECTED)
    530  1.1    cgd 			getnam = (struct mbuf **)0;
    531  1.1    cgd 		else
    532  1.1    cgd 			getnam = aname;
    533  1.1    cgd 		auio.uio_resid = len = 1000000;
    534  1.1    cgd 		do {
    535  1.1    cgd 			rcvflg = 0;
    536  1.1    cgd 			error =  soreceive(so, getnam, &auio, mp,
    537  1.1    cgd 				(struct mbuf **)0, &rcvflg);
    538  1.1    cgd 			if (error == EWOULDBLOCK && rep &&
    539  1.1    cgd 			    (rep->r_flags & R_SOFTTERM))
    540  1.1    cgd 				return (EINTR);
    541  1.1    cgd 		} while (error == EWOULDBLOCK);
    542  1.1    cgd 		len -= auio.uio_resid;
    543  1.1    cgd 	}
    544  1.1    cgd 	if (error) {
    545  1.1    cgd 		m_freem(*mp);
    546  1.1    cgd 		*mp = (struct mbuf *)0;
    547  1.1    cgd 	}
    548  1.1    cgd 	/*
    549  1.1    cgd 	 * Search for any mbufs that are not a multiple of 4 bytes long.
    550  1.1    cgd 	 * These could cause pointer alignment problems, so copy them to
    551  1.1    cgd 	 * well aligned mbufs.
    552  1.1    cgd 	 */
    553  1.1    cgd 	m = *mp;
    554  1.1    cgd 	mbp = mp;
    555  1.1    cgd 	while (m) {
    556  1.1    cgd 		/*
    557  1.1    cgd 		 * All this for something that may never happen.
    558  1.1    cgd 		 */
    559  1.1    cgd 		if (m->m_next && (m->m_len & 0x3)) {
    560  1.1    cgd 			printf("nfs_rcv odd length!\n");
    561  1.1    cgd 			mlen = 0;
    562  1.1    cgd 			while (m) {
    563  1.1    cgd 				fcp = mtod(m, caddr_t);
    564  1.1    cgd 				while (m->m_len > 0) {
    565  1.1    cgd 					if (mlen == 0) {
    566  1.1    cgd 						MGET(m2, M_WAIT, MT_DATA);
    567  1.1    cgd 						if (len >= MINCLSIZE)
    568  1.1    cgd 							MCLGET(m2, M_WAIT);
    569  1.1    cgd 						m2->m_len = 0;
    570  1.1    cgd 						mlen = M_TRAILINGSPACE(m2);
    571  1.1    cgd 						tcp = mtod(m2, caddr_t);
    572  1.1    cgd 						*mbp = m2;
    573  1.1    cgd 						mbp = &m2->m_next;
    574  1.1    cgd 					}
    575  1.1    cgd 					siz = MIN(mlen, m->m_len);
    576  1.1    cgd 					bcopy(fcp, tcp, siz);
    577  1.1    cgd 					m2->m_len += siz;
    578  1.1    cgd 					mlen -= siz;
    579  1.1    cgd 					len -= siz;
    580  1.1    cgd 					tcp += siz;
    581  1.1    cgd 					m->m_len -= siz;
    582  1.1    cgd 					fcp += siz;
    583  1.1    cgd 				}
    584  1.1    cgd 				MFREE(m, mnew);
    585  1.1    cgd 				m = mnew;
    586  1.1    cgd 			}
    587  1.1    cgd 			break;
    588  1.1    cgd 		}
    589  1.1    cgd 		len -= m->m_len;
    590  1.1    cgd 		mbp = &m->m_next;
    591  1.1    cgd 		m = m->m_next;
    592  1.1    cgd 	}
    593  1.1    cgd 	return (error);
    594  1.1    cgd }
    595  1.1    cgd 
    596  1.1    cgd /*
    597  1.1    cgd  * Implement receipt of reply on a socket.
    598  1.1    cgd  * We must search through the list of received datagrams matching them
    599  1.1    cgd  * with outstanding requests using the xid, until ours is found.
    600  1.1    cgd  */
    601  1.1    cgd /* ARGSUSED */
    602  1.1    cgd nfs_reply(nmp, myrep)
    603  1.1    cgd 	struct nfsmount *nmp;
    604  1.1    cgd 	struct nfsreq *myrep;
    605  1.1    cgd {
    606  1.1    cgd 	register struct mbuf *m;
    607  1.1    cgd 	register struct nfsreq *rep;
    608  1.1    cgd 	register int error = 0;
    609  1.1    cgd 	u_long rxid;
    610  1.1    cgd 	struct mbuf *mp, *nam;
    611  1.1    cgd 	char *cp;
    612  1.1    cgd 	int cnt, xfer;
    613  1.1    cgd 
    614  1.1    cgd 	/*
    615  1.1    cgd 	 * Loop around until we get our own reply
    616  1.1    cgd 	 */
    617  1.1    cgd 	for (;;) {
    618  1.1    cgd 		/*
    619  1.1    cgd 		 * Lock against other receivers so that I don't get stuck in
    620  1.1    cgd 		 * sbwait() after someone else has received my reply for me.
    621  1.1    cgd 		 * Also necessary for connection based protocols to avoid
    622  1.1    cgd 		 * race conditions during a reconnect.
    623  1.1    cgd 		 */
    624  1.1    cgd 		nfs_solock(&nmp->nm_flag);
    625  1.1    cgd 		/* Already received, bye bye */
    626  1.1    cgd 		if (myrep->r_mrep != NULL) {
    627  1.1    cgd 			nfs_sounlock(&nmp->nm_flag);
    628  1.1    cgd 			return (0);
    629  1.1    cgd 		}
    630  1.1    cgd 		/*
    631  1.1    cgd 		 * Get the next Rpc reply off the socket
    632  1.1    cgd 		 */
    633  1.1    cgd 		if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) {
    634  1.1    cgd 			nfs_sounlock(&nmp->nm_flag);
    635  1.1    cgd 
    636  1.1    cgd 			/*
    637  1.1    cgd 			 * Ignore routing errors on connectionless protocols??
    638  1.1    cgd 			 */
    639  1.1    cgd 			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
    640  1.1    cgd 				nmp->nm_so->so_error = 0;
    641  1.1    cgd 				continue;
    642  1.1    cgd 			}
    643  1.1    cgd 
    644  1.1    cgd 			/*
    645  1.1    cgd 			 * Otherwise cleanup and return a fatal error.
    646  1.1    cgd 			 */
    647  1.1    cgd 			if (myrep->r_flags & R_TIMING) {
    648  1.1    cgd 				myrep->r_flags &= ~R_TIMING;
    649  1.1    cgd 				nmp->nm_rtt = -1;
    650  1.1    cgd 			}
    651  1.1    cgd 			if (myrep->r_flags & R_SENT) {
    652  1.1    cgd 				myrep->r_flags &= ~R_SENT;
    653  1.1    cgd 				nmp->nm_sent--;
    654  1.1    cgd 			}
    655  1.1    cgd 			return (error);
    656  1.1    cgd 		}
    657  1.1    cgd 
    658  1.1    cgd 		/*
    659  1.1    cgd 		 * Get the xid and check that it is an rpc reply
    660  1.1    cgd 		 */
    661  1.1    cgd 		m = mp;
    662  1.1    cgd 		while (m && m->m_len == 0)
    663  1.1    cgd 			m = m->m_next;
    664  1.1    cgd 		if (m == NULL) {
    665  1.1    cgd 			nfsstats.rpcinvalid++;
    666  1.1    cgd 			m_freem(mp);
    667  1.1    cgd 			nfs_sounlock(&nmp->nm_flag);
    668  1.1    cgd 			continue;
    669  1.1    cgd 		}
    670  1.1    cgd 		bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED);
    671  1.1    cgd 		/*
    672  1.1    cgd 		 * Loop through the request list to match up the reply
    673  1.1    cgd 		 * Iff no match, just drop the datagram
    674  1.1    cgd 		 */
    675  1.1    cgd 		m = mp;
    676  1.1    cgd 		rep = nfsreqh.r_next;
    677  1.1    cgd 		while (rep != &nfsreqh) {
    678  1.1    cgd 			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
    679  1.1    cgd 				/* Found it.. */
    680  1.1    cgd 				rep->r_mrep = m;
    681  1.1    cgd 				/*
    682  1.1    cgd 				 * Update timing
    683  1.1    cgd 				 */
    684  1.1    cgd 				if (rep->r_flags & R_TIMING) {
    685  1.1    cgd 					nfs_updatetimer(rep->r_nmp);
    686  1.1    cgd 					rep->r_flags &= ~R_TIMING;
    687  1.1    cgd 					rep->r_nmp->nm_rtt = -1;
    688  1.1    cgd 				}
    689  1.1    cgd 				if (rep->r_flags & R_SENT) {
    690  1.1    cgd 					rep->r_flags &= ~R_SENT;
    691  1.1    cgd 					rep->r_nmp->nm_sent--;
    692  1.1    cgd 				}
    693  1.1    cgd 				break;
    694  1.1    cgd 			}
    695  1.1    cgd 			rep = rep->r_next;
    696  1.1    cgd 		}
    697  1.1    cgd 		nfs_sounlock(&nmp->nm_flag);
    698  1.1    cgd 		if (nam)
    699  1.1    cgd 			m_freem(nam);
    700  1.1    cgd 		/*
    701  1.1    cgd 		 * If not matched to a request, drop it.
    702  1.1    cgd 		 * If it's mine, get out.
    703  1.1    cgd 		 */
    704  1.1    cgd 		if (rep == &nfsreqh) {
    705  1.1    cgd 			nfsstats.rpcunexpected++;
    706  1.1    cgd 			m_freem(m);
    707  1.1    cgd 		} else if (rep == myrep)
    708  1.1    cgd 			return (0);
    709  1.1    cgd 	}
    710  1.1    cgd }
    711  1.1    cgd 
    712  1.1    cgd /*
    713  1.1    cgd  * nfs_request - goes something like this
    714  1.1    cgd  *	- fill in request struct
    715  1.1    cgd  *	- links it into list
    716  1.1    cgd  *	- calls nfs_send() for first transmit
    717  1.1    cgd  *	- calls nfs_receive() to get reply
    718  1.1    cgd  *	- break down rpc header and return with nfs reply pointed to
    719  1.1    cgd  *	  by mrep or error
    720  1.1    cgd  * nb: always frees up mreq mbuf list
    721  1.1    cgd  */
    722  1.1    cgd nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp)
    723  1.1    cgd 	struct vnode *vp;
    724  1.1    cgd 	struct mbuf *mreq;
    725  1.1    cgd 	u_long xid;
    726  1.1    cgd 	int procnum;
    727  1.1    cgd 	struct proc *procp;
    728  1.1    cgd 	int tryhard;
    729  1.1    cgd 	struct mount *mp;
    730  1.1    cgd 	struct mbuf **mrp;
    731  1.1    cgd 	struct mbuf **mdp;
    732  1.1    cgd 	caddr_t *dposp;
    733  1.1    cgd {
    734  1.1    cgd 	register struct mbuf *m, *mrep;
    735  1.1    cgd 	register struct nfsreq *rep;
    736  1.1    cgd 	register u_long *tl;
    737  1.1    cgd 	register int len;
    738  1.1    cgd 	struct nfsmount *nmp;
    739  1.1    cgd 	struct mbuf *md;
    740  1.1    cgd 	struct nfsreq *reph;
    741  1.1    cgd 	caddr_t dpos;
    742  1.1    cgd 	char *cp2;
    743  1.1    cgd 	int t1;
    744  1.1    cgd 	int s, compressed;
    745  1.1    cgd 	int error = 0;
    746  1.1    cgd 
    747  1.1    cgd 	nmp = VFSTONFS(mp);
    748  1.1    cgd 	m = mreq;
    749  1.1    cgd 	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
    750  1.1    cgd 	rep->r_xid = xid;
    751  1.1    cgd 	rep->r_nmp = nmp;
    752  1.1    cgd 	rep->r_vp = vp;
    753  1.1    cgd 	rep->r_procp = procp;
    754  1.1    cgd 	if ((nmp->nm_flag & NFSMNT_SOFT) ||
    755  1.1    cgd 	    ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard))
    756  1.1    cgd 		rep->r_retry = nmp->nm_retry;
    757  1.1    cgd 	else
    758  1.1    cgd 		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
    759  1.1    cgd 	rep->r_flags = rep->r_rexmit = 0;
    760  1.1    cgd 	/*
    761  1.1    cgd 	 * Three cases:
    762  1.1    cgd 	 * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
    763  1.1    cgd 	 * - idempotent requests on SOCK_DGRAM use 0
    764  1.1    cgd 	 * - Reliable transports, NFS_RELIABLETIMEO
    765  1.1    cgd 	 *   Timeouts are still done on reliable transports to ensure detection
    766  1.1    cgd 	 *   of excessive connection delay.
    767  1.1    cgd 	 */
    768  1.1    cgd 	if (nmp->nm_sotype != SOCK_DGRAM)
    769  1.1    cgd 		rep->r_timerinit = -NFS_RELIABLETIMEO;
    770  1.1    cgd 	else if (nonidempotent[procnum])
    771  1.1    cgd 		rep->r_timerinit = -NFS_MINIDEMTIMEO;
    772  1.1    cgd 	else
    773  1.1    cgd 		rep->r_timerinit = 0;
    774  1.1    cgd 	rep->r_timer = rep->r_timerinit;
    775  1.1    cgd 	rep->r_mrep = NULL;
    776  1.1    cgd 	len = 0;
    777  1.1    cgd 	while (m) {
    778  1.1    cgd 		len += m->m_len;
    779  1.1    cgd 		m = m->m_next;
    780  1.1    cgd 	}
    781  1.1    cgd 	mreq->m_pkthdr.len = len;
    782  1.1    cgd 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
    783  1.1    cgd 	compressed = 0;
    784  1.1    cgd 	m = mreq;
    785  1.1    cgd 	if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) {
    786  1.1    cgd 		mreq = nfs_compress(mreq);
    787  1.1    cgd 		if (mreq != m) {
    788  1.1    cgd 			len = mreq->m_pkthdr.len;
    789  1.1    cgd 			compressed++;
    790  1.1    cgd 		}
    791  1.1    cgd 	}
    792  1.1    cgd 	/*
    793  1.1    cgd 	 * For non-atomic protocols, insert a Sun RPC Record Mark.
    794  1.1    cgd 	 */
    795  1.1    cgd 	if ((nmp->nm_soflags & PR_ATOMIC) == 0) {
    796  1.1    cgd 		M_PREPEND(mreq, sizeof(u_long), M_WAIT);
    797  1.1    cgd 		*mtod(mreq, u_long *) = htonl(0x80000000 | len);
    798  1.1    cgd 	}
    799  1.1    cgd 	rep->r_mreq = mreq;
    800  1.1    cgd 
    801  1.1    cgd 	/*
    802  1.1    cgd 	 * Do the client side RPC.
    803  1.1    cgd 	 */
    804  1.1    cgd 	nfsstats.rpcrequests++;
    805  1.1    cgd 	/*
    806  1.1    cgd 	 * Chain request into list of outstanding requests. Be sure
    807  1.1    cgd 	 * to put it LAST so timer finds oldest requests first.
    808  1.1    cgd 	 */
    809  1.1    cgd 	s = splnet();
    810  1.1    cgd 	reph = &nfsreqh;
    811  1.1    cgd 	reph->r_prev->r_next = rep;
    812  1.1    cgd 	rep->r_prev = reph->r_prev;
    813  1.1    cgd 	reph->r_prev = rep;
    814  1.1    cgd 	rep->r_next = reph;
    815  1.1    cgd 	/*
    816  1.1    cgd 	 * If backing off another request or avoiding congestion, don't
    817  1.1    cgd 	 * send this one now but let timer do it. If not timing a request,
    818  1.1    cgd 	 * do it now.
    819  1.1    cgd 	 */
    820  1.1    cgd 	if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM ||
    821  1.1    cgd 	    (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) {
    822  1.1    cgd 		nmp->nm_sent++;
    823  1.1    cgd 		rep->r_flags |= R_SENT;
    824  1.1    cgd 		if (nmp->nm_rtt == -1) {
    825  1.1    cgd 			nmp->nm_rtt = 0;
    826  1.1    cgd 			rep->r_flags |= R_TIMING;
    827  1.1    cgd 		}
    828  1.1    cgd 		splx(s);
    829  1.1    cgd 		m = m_copym(mreq, 0, M_COPYALL, M_WAIT);
    830  1.1    cgd 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    831  1.1    cgd 			nfs_solock(&nmp->nm_flag);
    832  1.1    cgd 		error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
    833  1.1    cgd 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    834  1.1    cgd 			nfs_sounlock(&nmp->nm_flag);
    835  1.1    cgd 		if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error))
    836  1.1    cgd 			nmp->nm_so->so_error = error = 0;
    837  1.1    cgd 	} else
    838  1.1    cgd 		splx(s);
    839  1.1    cgd 
    840  1.1    cgd 	/*
    841  1.1    cgd 	 * Wait for the reply from our send or the timer's.
    842  1.1    cgd 	 */
    843  1.1    cgd 	if (!error)
    844  1.1    cgd 		error = nfs_reply(nmp, rep);
    845  1.1    cgd 
    846  1.1    cgd 	/*
    847  1.1    cgd 	 * RPC done, unlink the request.
    848  1.1    cgd 	 */
    849  1.1    cgd 	s = splnet();
    850  1.1    cgd 	rep->r_prev->r_next = rep->r_next;
    851  1.1    cgd 	rep->r_next->r_prev = rep->r_prev;
    852  1.1    cgd 	splx(s);
    853  1.1    cgd 
    854  1.1    cgd 	/*
    855  1.1    cgd 	 * If there was a successful reply and a tprintf msg.
    856  1.1    cgd 	 * tprintf a response.
    857  1.1    cgd 	 */
    858  1.1    cgd 	if (!error && (rep->r_flags & R_TPRINTFMSG))
    859  1.1    cgd 		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    860  1.1    cgd 		    "is alive again");
    861  1.1    cgd 	m_freem(rep->r_mreq);
    862  1.1    cgd 	mrep = rep->r_mrep;
    863  1.1    cgd 	FREE((caddr_t)rep, M_NFSREQ);
    864  1.1    cgd 	if (error)
    865  1.1    cgd 		return (error);
    866  1.1    cgd 
    867  1.1    cgd 	if (compressed)
    868  1.1    cgd 		mrep = nfs_uncompress(mrep);
    869  1.1    cgd 	md = mrep;
    870  1.1    cgd 	/*
    871  1.1    cgd 	 * break down the rpc header and check if ok
    872  1.1    cgd 	 */
    873  1.1    cgd 	dpos = mtod(md, caddr_t);
    874  1.1    cgd 	nfsm_disect(tl, u_long *, 5*NFSX_UNSIGNED);
    875  1.1    cgd 	tl += 2;
    876  1.1    cgd 	if (*tl++ == rpc_msgdenied) {
    877  1.1    cgd 		if (*tl == rpc_mismatch)
    878  1.1    cgd 			error = EOPNOTSUPP;
    879  1.1    cgd 		else
    880  1.1    cgd 			error = EACCES;
    881  1.1    cgd 		m_freem(mrep);
    882  1.1    cgd 		return (error);
    883  1.1    cgd 	}
    884  1.1    cgd 	/*
    885  1.1    cgd 	 * skip over the auth_verf, someday we may want to cache auth_short's
    886  1.1    cgd 	 * for nfs_reqhead(), but for now just dump it
    887  1.1    cgd 	 */
    888  1.1    cgd 	if (*++tl != 0) {
    889  1.1    cgd 		len = nfsm_rndup(fxdr_unsigned(long, *tl));
    890  1.1    cgd 		nfsm_adv(len);
    891  1.1    cgd 	}
    892  1.1    cgd 	nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    893  1.1    cgd 	/* 0 == ok */
    894  1.1    cgd 	if (*tl == 0) {
    895  1.1    cgd 		nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    896  1.1    cgd 		if (*tl != 0) {
    897  1.1    cgd 			error = fxdr_unsigned(int, *tl);
    898  1.1    cgd 			m_freem(mrep);
    899  1.1    cgd 			return (error);
    900  1.1    cgd 		}
    901  1.1    cgd 		*mrp = mrep;
    902  1.1    cgd 		*mdp = md;
    903  1.1    cgd 		*dposp = dpos;
    904  1.1    cgd 		return (0);
    905  1.1    cgd 	}
    906  1.1    cgd 	m_freem(mrep);
    907  1.1    cgd 	return (EPROTONOSUPPORT);
    908  1.1    cgd nfsmout:
    909  1.1    cgd 	return (error);
    910  1.1    cgd }
    911  1.1    cgd 
    912  1.1    cgd /*
    913  1.1    cgd  * Get a request for the server main loop
    914  1.1    cgd  * - receive a request via. nfs_soreceive()
    915  1.1    cgd  * - verify it
    916  1.1    cgd  * - fill in the cred struct.
    917  1.1    cgd  */
    918  1.1    cgd nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr,
    919  1.2    cgd 	msk, mtch, wascomp, repstat)				/* 08 Aug 92*/
    920  1.1    cgd 	struct socket *so;
    921  1.1    cgd 	u_long prog;
    922  1.1    cgd 	u_long vers;
    923  1.1    cgd 	int maxproc;
    924  1.1    cgd 	struct mbuf **nam;
    925  1.1    cgd 	struct mbuf **mrp;
    926  1.1    cgd 	struct mbuf **mdp;
    927  1.1    cgd 	caddr_t *dposp;
    928  1.1    cgd 	u_long *retxid;
    929  1.1    cgd 	u_long *procnum;
    930  1.1    cgd 	register struct ucred *cr;
    931  1.1    cgd 	struct mbuf *msk, *mtch;
    932  1.2    cgd 	int *wascomp, *repstat;					/* 08 Aug 92*/
    933  1.1    cgd {
    934  1.1    cgd 	register int i;
    935  1.1    cgd 	register u_long *tl;
    936  1.1    cgd 	register long t1;
    937  1.1    cgd 	caddr_t dpos, cp2;
    938  1.1    cgd 	int error = 0;
    939  1.1    cgd 	struct mbuf *mrep, *md;
    940  1.1    cgd 	int len;
    941  1.1    cgd 
    942  1.2    cgd 	*repstat = 0;						/* 08 Aug 92*/
    943  1.1    cgd 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    944  1.1    cgd 		error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    945  1.1    cgd 	} else {
    946  1.1    cgd 		mrep = (struct mbuf *)0;
    947  1.1    cgd 		do {
    948  1.1    cgd 			if (mrep) {
    949  1.1    cgd 				m_freem(*nam);
    950  1.1    cgd 				m_freem(mrep);
    951  1.1    cgd 			}
    952  1.1    cgd 			error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    953  1.1    cgd 		} while (!error && nfs_badnam(*nam, msk, mtch));
    954  1.1    cgd 	}
    955  1.1    cgd 	if (error)
    956  1.1    cgd 		return (error);
    957  1.1    cgd 	md = mrep;
    958  1.1    cgd 	mrep = nfs_uncompress(mrep);
    959  1.1    cgd 	if (mrep != md) {
    960  1.1    cgd 		*wascomp = 1;
    961  1.1    cgd 		md = mrep;
    962  1.1    cgd 	} else
    963  1.1    cgd 		*wascomp = 0;
    964  1.1    cgd 	dpos = mtod(mrep, caddr_t);
    965  1.1    cgd 	nfsm_disect(tl, u_long *, 10*NFSX_UNSIGNED);
    966  1.1    cgd 	*retxid = *tl++;
    967  1.2    cgd 	if (*tl++ != rpc_call || *tl++ != rpc_vers) {		/* 08 Aug 92*/
    968  1.2    cgd 		*mrp = mrep;
    969  1.2    cgd 		*procnum = NFSPROC_NOOP;
    970  1.2    cgd 		*repstat = ERPCMISMATCH;
    971  1.2    cgd 		return (0);
    972  1.1    cgd 	}
    973  1.1    cgd 	if (*tl++ != prog) {
    974  1.2    cgd 		*mrp = mrep;					/* 08 Aug 92*/
    975  1.2    cgd 		*procnum = NFSPROC_NOOP;
    976  1.2    cgd 		*repstat = EPROGUNAVAIL;
    977  1.2    cgd 		return (0);
    978  1.1    cgd 	}
    979  1.1    cgd 	if (*tl++ != vers) {
    980  1.2    cgd 		*mrp = mrep;					/* 08 Aug 92*/
    981  1.2    cgd 		*procnum = NFSPROC_NOOP;
    982  1.2    cgd 		*repstat = EPROGMISMATCH;
    983  1.2    cgd 		return (0);
    984  1.1    cgd 	}
    985  1.1    cgd 	*procnum = fxdr_unsigned(u_long, *tl++);
    986  1.1    cgd 	if (*procnum == NFSPROC_NULL) {
    987  1.1    cgd 		*mrp = mrep;
    988  1.1    cgd 		return (0);
    989  1.1    cgd 	}
    990  1.1    cgd 	if (*procnum > maxproc || *tl++ != rpc_auth_unix) {
    991  1.2    cgd 		*mrp = mrep;					/* 08 Aug 92*/
    992  1.2    cgd 		*procnum = NFSPROC_NOOP;
    993  1.2    cgd 		*repstat = EPROCUNAVAIL;
    994  1.2    cgd 		return (0);
    995  1.1    cgd 	}
    996  1.1    cgd 	len = fxdr_unsigned(int, *tl++);
    997  1.1    cgd 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
    998  1.1    cgd 		m_freem(mrep);
    999  1.1    cgd 		return (EBADRPC);
   1000  1.1    cgd 	}
   1001  1.1    cgd 	len = fxdr_unsigned(int, *++tl);
   1002  1.1    cgd 	if (len < 0 || len > NFS_MAXNAMLEN) {
   1003  1.1    cgd 		m_freem(mrep);
   1004  1.1    cgd 		return (EBADRPC);
   1005  1.1    cgd 	}
   1006  1.1    cgd 	nfsm_adv(nfsm_rndup(len));
   1007  1.1    cgd 	nfsm_disect(tl, u_long *, 3*NFSX_UNSIGNED);
   1008  1.1    cgd 	cr->cr_uid = fxdr_unsigned(uid_t, *tl++);
   1009  1.1    cgd 	cr->cr_gid = fxdr_unsigned(gid_t, *tl++);
   1010  1.1    cgd 	len = fxdr_unsigned(int, *tl);
   1011  1.1    cgd 	if (len < 0 || len > RPCAUTH_UNIXGIDS) {
   1012  1.1    cgd 		m_freem(mrep);
   1013  1.1    cgd 		return (EBADRPC);
   1014  1.1    cgd 	}
   1015  1.1    cgd 	nfsm_disect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
   1016  1.1    cgd 	for (i = 1; i <= len; i++)
   1017  1.1    cgd 		if (i < NGROUPS)
   1018  1.1    cgd 			cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
   1019  1.1    cgd 		else
   1020  1.1    cgd 			tl++;
   1021  1.1    cgd 	cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
   1022  1.1    cgd 	/*
   1023  1.1    cgd 	 * Do we have any use for the verifier.
   1024  1.1    cgd 	 * According to the "Remote Procedure Call Protocol Spec." it
   1025  1.1    cgd 	 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
   1026  1.1    cgd 	 * For now, just skip over it
   1027  1.1    cgd 	 */
   1028  1.1    cgd 	len = fxdr_unsigned(int, *++tl);
   1029  1.1    cgd 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
   1030  1.1    cgd 		m_freem(mrep);
   1031  1.1    cgd 		return (EBADRPC);
   1032  1.1    cgd 	}
   1033  1.1    cgd 	if (len > 0)
   1034  1.1    cgd 		nfsm_adv(nfsm_rndup(len));
   1035  1.1    cgd 	*mrp = mrep;
   1036  1.1    cgd 	*mdp = md;
   1037  1.1    cgd 	*dposp = dpos;
   1038  1.1    cgd 	return (0);
   1039  1.1    cgd nfsmout:
   1040  1.1    cgd 	return (error);
   1041  1.1    cgd }
   1042  1.1    cgd 
   1043  1.1    cgd /*
   1044  1.1    cgd  * Generate the rpc reply header
   1045  1.1    cgd  * siz arg. is used to decide if adding a cluster is worthwhile
   1046  1.1    cgd  */
   1047  1.1    cgd nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
   1048  1.1    cgd 	int siz;
   1049  1.1    cgd 	u_long retxid;
   1050  1.1    cgd 	int err;
   1051  1.1    cgd 	struct mbuf **mrq;
   1052  1.1    cgd 	struct mbuf **mbp;
   1053  1.1    cgd 	caddr_t *bposp;
   1054  1.1    cgd {
   1055  1.1    cgd 	register u_long *tl;
   1056  1.1    cgd 	register long t1;
   1057  1.1    cgd 	caddr_t bpos;
   1058  1.1    cgd 	struct mbuf *mreq, *mb, *mb2;
   1059  1.1    cgd 
   1060  1.1    cgd 	NFSMGETHDR(mreq);
   1061  1.1    cgd 	mb = mreq;
   1062  1.1    cgd 	if ((siz+RPC_REPLYSIZ) > MHLEN)
   1063  1.1    cgd 		MCLGET(mreq, M_WAIT);
   1064  1.1    cgd 	tl = mtod(mreq, u_long *);
   1065  1.1    cgd 	mreq->m_len = 6*NFSX_UNSIGNED;
   1066  1.1    cgd 	bpos = ((caddr_t)tl)+mreq->m_len;
   1067  1.1    cgd 	*tl++ = retxid;
   1068  1.1    cgd 	*tl++ = rpc_reply;
   1069  1.1    cgd 	if (err == ERPCMISMATCH) {
   1070  1.1    cgd 		*tl++ = rpc_msgdenied;
   1071  1.1    cgd 		*tl++ = rpc_mismatch;
   1072  1.1    cgd 		*tl++ = txdr_unsigned(2);
   1073  1.1    cgd 		*tl = txdr_unsigned(2);
   1074  1.1    cgd 	} else {
   1075  1.1    cgd 		*tl++ = rpc_msgaccepted;
   1076  1.1    cgd 		*tl++ = 0;
   1077  1.1    cgd 		*tl++ = 0;
   1078  1.1    cgd 		switch (err) {
   1079  1.1    cgd 		case EPROGUNAVAIL:
   1080  1.1    cgd 			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
   1081  1.1    cgd 			break;
   1082  1.1    cgd 		case EPROGMISMATCH:
   1083  1.1    cgd 			*tl = txdr_unsigned(RPC_PROGMISMATCH);
   1084  1.1    cgd 			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
   1085  1.1    cgd 			*tl++ = txdr_unsigned(2);
   1086  1.1    cgd 			*tl = txdr_unsigned(2);	/* someday 3 */
   1087  1.1    cgd 			break;
   1088  1.1    cgd 		case EPROCUNAVAIL:
   1089  1.1    cgd 			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
   1090  1.1    cgd 			break;
   1091  1.1    cgd 		default:
   1092  1.1    cgd 			*tl = 0;
   1093  1.1    cgd 			if (err != VNOVAL) {
   1094  1.1    cgd 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
   1095  1.1    cgd 				*tl = txdr_unsigned(err);
   1096  1.1    cgd 			}
   1097  1.1    cgd 			break;
   1098  1.1    cgd 		};
   1099  1.1    cgd 	}
   1100  1.1    cgd 	*mrq = mreq;
   1101  1.1    cgd 	*mbp = mb;
   1102  1.1    cgd 	*bposp = bpos;
   1103  1.1    cgd 	if (err != 0 && err != VNOVAL)
   1104  1.1    cgd 		nfsstats.srvrpc_errs++;
   1105  1.1    cgd 	return (0);
   1106  1.1    cgd }
   1107  1.1    cgd 
   1108  1.1    cgd /*
   1109  1.1    cgd  * Nfs timer routine
   1110  1.1    cgd  * Scan the nfsreq list and retranmit any requests that have timed out
   1111  1.1    cgd  * To avoid retransmission attempts on STREAM sockets (in the future) make
   1112  1.1    cgd  * sure to set the r_retry field to 0 (implies nm_retry == 0).
   1113  1.1    cgd  */
   1114  1.1    cgd nfs_timer()
   1115  1.1    cgd {
   1116  1.1    cgd 	register struct nfsreq *rep;
   1117  1.1    cgd 	register struct mbuf *m;
   1118  1.1    cgd 	register struct socket *so;
   1119  1.1    cgd 	register struct nfsmount *nmp;
   1120  1.1    cgd 	int s, error;
   1121  1.1    cgd 
   1122  1.1    cgd 	s = splnet();
   1123  1.1    cgd 	for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
   1124  1.1    cgd 		nmp = rep->r_nmp;
   1125  1.1    cgd 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) ||
   1126  1.1    cgd 		    (so = nmp->nm_so) == NULL)
   1127  1.1    cgd 			continue;
   1128  1.1    cgd 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) {
   1129  1.1    cgd 			rep->r_flags |= R_SOFTTERM;
   1130  1.1    cgd 			continue;
   1131  1.1    cgd 		}
   1132  1.1    cgd 		if (rep->r_flags & R_TIMING)	/* update rtt in mount */
   1133  1.1    cgd 			nmp->nm_rtt++;
   1134  1.1    cgd 		/* If not timed out */
   1135  1.1    cgd 		if (++rep->r_timer < nmp->nm_rto)
   1136  1.1    cgd 			continue;
   1137  1.1    cgd 		/* Do backoff and save new timeout in mount */
   1138  1.1    cgd 		if (rep->r_flags & R_TIMING) {
   1139  1.1    cgd 			nfs_backofftimer(nmp);
   1140  1.1    cgd 			rep->r_flags &= ~R_TIMING;
   1141  1.1    cgd 			nmp->nm_rtt = -1;
   1142  1.1    cgd 		}
   1143  1.1    cgd 		if (rep->r_flags & R_SENT) {
   1144  1.1    cgd 			rep->r_flags &= ~R_SENT;
   1145  1.1    cgd 			nmp->nm_sent--;
   1146  1.1    cgd 		}
   1147  1.1    cgd 
   1148  1.1    cgd 		/*
   1149  1.1    cgd 		 * Check for too many retries on soft mount.
   1150  1.1    cgd 		 * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
   1151  1.1    cgd 		 */
   1152  1.1    cgd 		if (++rep->r_rexmit > NFS_MAXREXMIT)
   1153  1.1    cgd 			rep->r_rexmit = NFS_MAXREXMIT;
   1154  1.1    cgd 
   1155  1.1    cgd 		/*
   1156  1.1    cgd 		 * Check for server not responding
   1157  1.1    cgd 		 */
   1158  1.1    cgd 		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
   1159  1.1    cgd 		     rep->r_rexmit > NFS_FISHY) {
   1160  1.1    cgd 			nfs_msg(rep->r_procp,
   1161  1.1    cgd 			    nmp->nm_mountp->mnt_stat.f_mntfromname,
   1162  1.1    cgd 			    "not responding");
   1163  1.1    cgd 			rep->r_flags |= R_TPRINTFMSG;
   1164  1.1    cgd 		}
   1165  1.1    cgd 		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
   1166  1.1    cgd 			nfsstats.rpctimeouts++;
   1167  1.1    cgd 			rep->r_flags |= R_SOFTTERM;
   1168  1.1    cgd 			continue;
   1169  1.1    cgd 		}
   1170  1.1    cgd 		if (nmp->nm_sotype != SOCK_DGRAM)
   1171  1.1    cgd 			continue;
   1172  1.1    cgd 
   1173  1.1    cgd 		/*
   1174  1.1    cgd 		 * If there is enough space and the window allows..
   1175  1.1    cgd 		 *	Resend it
   1176  1.1    cgd 		 */
   1177  1.1    cgd 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
   1178  1.1    cgd 		       nmp->nm_sent < nmp->nm_window &&
   1179  1.1    cgd 		       (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
   1180  1.1    cgd 			nfsstats.rpcretries++;
   1181  1.1    cgd 			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
   1182  1.1    cgd 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1183  1.1    cgd 			    (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0);
   1184  1.1    cgd 			else
   1185  1.1    cgd 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1186  1.1    cgd 			    nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0);
   1187  1.1    cgd 			if (error) {
   1188  1.1    cgd 				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
   1189  1.1    cgd 					so->so_error = 0;
   1190  1.1    cgd 			} else {
   1191  1.1    cgd 				/*
   1192  1.1    cgd 				 * We need to time the request even though we
   1193  1.1    cgd 				 * are retransmitting.
   1194  1.1    cgd 				 */
   1195  1.1    cgd 				nmp->nm_rtt = 0;
   1196  1.1    cgd 				nmp->nm_sent++;
   1197  1.1    cgd 				rep->r_flags |= (R_SENT|R_TIMING);
   1198  1.1    cgd 				rep->r_timer = rep->r_timerinit;
   1199  1.1    cgd 			}
   1200  1.1    cgd 		}
   1201  1.1    cgd 	}
   1202  1.1    cgd 	splx(s);
   1203  1.1    cgd 	timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ);
   1204  1.1    cgd }
   1205  1.1    cgd 
   1206  1.1    cgd /*
   1207  1.1    cgd  * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
   1208  1.1    cgd  * used here. The timer state is held in the nfsmount structure and
   1209  1.1    cgd  * a single request is used to clock the response. When successful
   1210  1.1    cgd  * the rtt smoothing in nfs_updatetimer is used, when failed the backoff
   1211  1.1    cgd  * is done by nfs_backofftimer. We also log failure messages in these
   1212  1.1    cgd  * routines.
   1213  1.1    cgd  *
   1214  1.1    cgd  * Congestion variables are held in the nfshost structure which
   1215  1.1    cgd  * is referenced by nfsmounts and shared per-server. This separation
   1216  1.1    cgd  * makes it possible to do per-mount timing which allows varying disk
   1217  1.1    cgd  * access times to be dealt with, while preserving a network oriented
   1218  1.1    cgd  * congestion control scheme.
   1219  1.1    cgd  *
   1220  1.1    cgd  * The windowing implements the Jacobson/Karels slowstart algorithm
   1221  1.1    cgd  * with adjusted scaling factors. We start with one request, then send
   1222  1.1    cgd  * 4 more after each success until the ssthresh limit is reached, then
   1223  1.1    cgd  * we increment at a rate proportional to the window. On failure, we
   1224  1.1    cgd  * remember 3/4 the current window and clamp the send limit to 1. Note
   1225  1.1    cgd  * ICMP source quench is not reflected in so->so_error so we ignore that
   1226  1.1    cgd  * for now.
   1227  1.1    cgd  *
   1228  1.1    cgd  * NFS behaves much more like a transport protocol with these changes,
   1229  1.1    cgd  * shedding the teenage pedal-to-the-metal tendencies of "other"
   1230  1.1    cgd  * implementations.
   1231  1.1    cgd  *
   1232  1.1    cgd  * Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
   1233  1.1    cgd  */
   1234  1.1    cgd 
   1235  1.1    cgd /*
   1236  1.1    cgd  * The TCP algorithm was not forgiving enough. Because the NFS server
   1237  1.1    cgd  * responds only after performing lookups/diskio/etc, we have to be
   1238  1.1    cgd  * more prepared to accept a spiky variance. The TCP algorithm is:
   1239  1.1    cgd  * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
   1240  1.1    cgd  */
   1241  1.1    cgd #define NFS_RTO(nmp)	(((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
   1242  1.1    cgd 
   1243  1.1    cgd nfs_updatetimer(nmp)
   1244  1.1    cgd 	register struct nfsmount *nmp;
   1245  1.1    cgd {
   1246  1.1    cgd 
   1247  1.1    cgd 	/* If retransmitted, clear and return */
   1248  1.1    cgd 	if (nmp->nm_rexmit || nmp->nm_currexmit) {
   1249  1.1    cgd 		nmp->nm_rexmit = nmp->nm_currexmit = 0;
   1250  1.1    cgd 		return;
   1251  1.1    cgd 	}
   1252  1.1    cgd 	/* If have a measurement, do smoothing */
   1253  1.1    cgd 	if (nmp->nm_srtt) {
   1254  1.1    cgd 		register short delta;
   1255  1.1    cgd 		delta = nmp->nm_rtt - (nmp->nm_srtt >> 3);
   1256  1.1    cgd 		if ((nmp->nm_srtt += delta) <= 0)
   1257  1.1    cgd 			nmp->nm_srtt = 1;
   1258  1.1    cgd 		if (delta < 0)
   1259  1.1    cgd 			delta = -delta;
   1260  1.1    cgd 		delta -= (nmp->nm_rttvar >> 2);
   1261  1.1    cgd 		if ((nmp->nm_rttvar += delta) <= 0)
   1262  1.1    cgd 			nmp->nm_rttvar = 1;
   1263  1.1    cgd 	/* Else initialize */
   1264  1.1    cgd 	} else {
   1265  1.1    cgd 		nmp->nm_rttvar = nmp->nm_rtt << 1;
   1266  1.1    cgd 		if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2;
   1267  1.1    cgd 		nmp->nm_srtt = nmp->nm_rttvar << 2;
   1268  1.1    cgd 	}
   1269  1.1    cgd 	/* Compute new Retransmission TimeOut and clip */
   1270  1.1    cgd 	nmp->nm_rto = NFS_RTO(nmp);
   1271  1.1    cgd 	if (nmp->nm_rto < NFS_MINTIMEO)
   1272  1.1    cgd 		nmp->nm_rto = NFS_MINTIMEO;
   1273  1.1    cgd 	else if (nmp->nm_rto > NFS_MAXTIMEO)
   1274  1.1    cgd 		nmp->nm_rto = NFS_MAXTIMEO;
   1275  1.1    cgd 
   1276  1.1    cgd 	/* Update window estimate */
   1277  1.1    cgd 	if (nmp->nm_window < nmp->nm_ssthresh)	/* quickly */
   1278  1.1    cgd 		nmp->nm_window += 4;
   1279  1.1    cgd 	else {						/* slowly */
   1280  1.1    cgd 		register long incr = ++nmp->nm_winext;
   1281  1.1    cgd 		incr = (incr * incr) / nmp->nm_window;
   1282  1.1    cgd 		if (incr > 0) {
   1283  1.1    cgd 			nmp->nm_winext = 0;
   1284  1.1    cgd 			++nmp->nm_window;
   1285  1.1    cgd 		}
   1286  1.1    cgd 	}
   1287  1.1    cgd 	if (nmp->nm_window > NFS_MAXWINDOW)
   1288  1.1    cgd 		nmp->nm_window = NFS_MAXWINDOW;
   1289  1.1    cgd }
   1290  1.1    cgd 
   1291  1.1    cgd nfs_backofftimer(nmp)
   1292  1.1    cgd 	register struct nfsmount *nmp;
   1293  1.1    cgd {
   1294  1.1    cgd 	register unsigned long newrto;
   1295  1.1    cgd 
   1296  1.1    cgd 	/* Clip shift count */
   1297  1.1    cgd 	if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto)
   1298  1.1    cgd 		nmp->nm_rexmit = 8 * sizeof nmp->nm_rto;
   1299  1.1    cgd 	/* Back off RTO exponentially */
   1300  1.1    cgd 	newrto = NFS_RTO(nmp);
   1301  1.1    cgd 	newrto <<= (nmp->nm_rexmit - 1);
   1302  1.1    cgd 	if (newrto == 0 || newrto > NFS_MAXTIMEO)
   1303  1.1    cgd 		newrto = NFS_MAXTIMEO;
   1304  1.1    cgd 	nmp->nm_rto = newrto;
   1305  1.1    cgd 
   1306  1.1    cgd 	/* If too many retries, message, assume a bogus RTT and re-measure */
   1307  1.1    cgd 	if (nmp->nm_currexmit < nmp->nm_rexmit) {
   1308  1.1    cgd 		nmp->nm_currexmit = nmp->nm_rexmit;
   1309  1.1    cgd 		if (nmp->nm_currexmit >= nfsrexmtthresh) {
   1310  1.1    cgd 			if (nmp->nm_currexmit == nfsrexmtthresh) {
   1311  1.1    cgd 				nmp->nm_rttvar += (nmp->nm_srtt >> 2);
   1312  1.1    cgd 				nmp->nm_srtt = 0;
   1313  1.1    cgd 			}
   1314  1.1    cgd 		}
   1315  1.1    cgd 	}
   1316  1.1    cgd 	/* Close down window but remember this point (3/4 current) for later */
   1317  1.1    cgd 	nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2;
   1318  1.1    cgd 	nmp->nm_window = 1;
   1319  1.1    cgd 	nmp->nm_winext = 0;
   1320  1.1    cgd }
   1321  1.1    cgd 
   1322  1.1    cgd /*
   1323  1.1    cgd  * Test for a termination signal pending on procp.
   1324  1.1    cgd  * This is used for NFSMNT_INT mounts.
   1325  1.1    cgd  */
   1326  1.1    cgd nfs_sigintr(p)
   1327  1.1    cgd 	register struct proc *p;
   1328  1.1    cgd {
   1329  1.1    cgd 	if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) &
   1330  1.1    cgd 	    NFSINT_SIGMASK))
   1331  1.1    cgd 		return (1);
   1332  1.1    cgd 	else
   1333  1.1    cgd 		return (0);
   1334  1.1    cgd }
   1335  1.1    cgd 
   1336  1.1    cgd nfs_msg(p, server, msg)
   1337  1.1    cgd 	struct proc *p;
   1338  1.1    cgd 	char *server, *msg;
   1339  1.1    cgd {
   1340  1.1    cgd 	tpr_t tpr;
   1341  1.1    cgd 
   1342  1.1    cgd 	if (p)
   1343  1.1    cgd 		tpr = tprintf_open(p);
   1344  1.1    cgd 	else
   1345  1.1    cgd 		tpr = NULL;
   1346  1.1    cgd 	tprintf(tpr, "nfs server %s: %s\n", server, msg);
   1347  1.1    cgd 	tprintf_close(tpr);
   1348  1.1    cgd }
   1349  1.1    cgd 
   1350  1.1    cgd /*
   1351  1.1    cgd  * Lock a socket against others.
   1352  1.1    cgd  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
   1353  1.1    cgd  * and also to avoid race conditions between the processes with nfs requests
   1354  1.1    cgd  * in progress when a reconnect is necessary.
   1355  1.1    cgd  */
   1356  1.1    cgd nfs_solock(flagp)
   1357  1.1    cgd 	register int *flagp;
   1358  1.1    cgd {
   1359  1.1    cgd 
   1360  1.1    cgd 	while (*flagp & NFSMNT_SCKLOCK) {
   1361  1.1    cgd 		*flagp |= NFSMNT_WANTSCK;
   1362  1.1    cgd 		(void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0);
   1363  1.1    cgd 	}
   1364  1.1    cgd 	*flagp |= NFSMNT_SCKLOCK;
   1365  1.1    cgd }
   1366  1.1    cgd 
   1367  1.1    cgd /*
   1368  1.1    cgd  * Unlock the stream socket for others.
   1369  1.1    cgd  */
   1370  1.1    cgd nfs_sounlock(flagp)
   1371  1.1    cgd 	register int *flagp;
   1372  1.1    cgd {
   1373  1.1    cgd 
   1374  1.1    cgd 	if ((*flagp & NFSMNT_SCKLOCK) == 0)
   1375  1.1    cgd 		panic("nfs sounlock");
   1376  1.1    cgd 	*flagp &= ~NFSMNT_SCKLOCK;
   1377  1.1    cgd 	if (*flagp & NFSMNT_WANTSCK) {
   1378  1.1    cgd 		*flagp &= ~NFSMNT_WANTSCK;
   1379  1.1    cgd 		wakeup((caddr_t)flagp);
   1380  1.1    cgd 	}
   1381  1.1    cgd }
   1382  1.1    cgd 
   1383  1.1    cgd /*
   1384  1.1    cgd  * This function compares two net addresses by family and returns TRUE
   1385  1.1    cgd  * if they are the same.
   1386  1.1    cgd  * If there is any doubt, return FALSE.
   1387  1.1    cgd  */
   1388  1.1    cgd nfs_netaddr_match(nam1, nam2)
   1389  1.1    cgd 	struct mbuf *nam1, *nam2;
   1390  1.1    cgd {
   1391  1.1    cgd 	register struct sockaddr *saddr1, *saddr2;
   1392  1.1    cgd 
   1393  1.1    cgd 	saddr1 = mtod(nam1, struct sockaddr *);
   1394  1.1    cgd 	saddr2 = mtod(nam2, struct sockaddr *);
   1395  1.1    cgd 	if (saddr1->sa_family != saddr2->sa_family)
   1396  1.1    cgd 		return (0);
   1397  1.1    cgd 
   1398  1.1    cgd 	/*
   1399  1.1    cgd 	 * Must do each address family separately since unused fields
   1400  1.1    cgd 	 * are undefined values and not always zeroed.
   1401  1.1    cgd 	 */
   1402  1.1    cgd 	switch (saddr1->sa_family) {
   1403  1.1    cgd 	case AF_INET:
   1404  1.1    cgd 		if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr ==
   1405  1.1    cgd 		    ((struct sockaddr_in *)saddr2)->sin_addr.s_addr)
   1406  1.1    cgd 			return (1);
   1407  1.1    cgd 		break;
   1408  1.1    cgd 	default:
   1409  1.1    cgd 		break;
   1410  1.1    cgd 	};
   1411  1.1    cgd 	return (0);
   1412  1.1    cgd }
   1413  1.1    cgd 
   1414  1.1    cgd /*
   1415  1.1    cgd  * Check the hostname fields for nfsd's mask and match fields.
   1416  1.1    cgd  * By address family:
   1417  1.1    cgd  * - Bitwise AND the mask with the host address field
   1418  1.1    cgd  * - Compare for == with match
   1419  1.1    cgd  * return TRUE if not equal
   1420  1.1    cgd  */
   1421  1.1    cgd nfs_badnam(nam, msk, mtch)
   1422  1.1    cgd 	register struct mbuf *nam, *msk, *mtch;
   1423  1.1    cgd {
   1424  1.1    cgd 	switch (mtod(nam, struct sockaddr *)->sa_family) {
   1425  1.1    cgd 	case AF_INET:
   1426  1.1    cgd 		return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr &
   1427  1.1    cgd 			 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) !=
   1428  1.1    cgd 			 mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr);
   1429  1.1    cgd 	default:
   1430  1.1    cgd 		printf("nfs_badmatch, unknown sa_family\n");
   1431  1.1    cgd 		return (0);
   1432  1.1    cgd 	};
   1433  1.1    cgd }
   1434