Home | History | Annotate | Line # | Download | only in nfs
nfs_socket.c revision 1.11
      1   1.1      cgd /*
      2   1.1      cgd  * Copyright (c) 1989, 1991 The Regents of the University of California.
      3   1.1      cgd  * All rights reserved.
      4   1.1      cgd  *
      5   1.1      cgd  * This code is derived from software contributed to Berkeley by
      6   1.1      cgd  * Rick Macklem at The University of Guelph.
      7   1.1      cgd  *
      8   1.1      cgd  * Redistribution and use in source and binary forms, with or without
      9   1.1      cgd  * modification, are permitted provided that the following conditions
     10   1.1      cgd  * are met:
     11   1.1      cgd  * 1. Redistributions of source code must retain the above copyright
     12   1.1      cgd  *    notice, this list of conditions and the following disclaimer.
     13   1.1      cgd  * 2. Redistributions in binary form must reproduce the above copyright
     14   1.1      cgd  *    notice, this list of conditions and the following disclaimer in the
     15   1.1      cgd  *    documentation and/or other materials provided with the distribution.
     16   1.1      cgd  * 3. All advertising materials mentioning features or use of this software
     17   1.1      cgd  *    must display the following acknowledgement:
     18   1.1      cgd  *	This product includes software developed by the University of
     19   1.1      cgd  *	California, Berkeley and its contributors.
     20   1.1      cgd  * 4. Neither the name of the University nor the names of its contributors
     21   1.1      cgd  *    may be used to endorse or promote products derived from this software
     22   1.1      cgd  *    without specific prior written permission.
     23   1.1      cgd  *
     24   1.1      cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25   1.1      cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26   1.1      cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27   1.1      cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28   1.1      cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29   1.1      cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30   1.1      cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31   1.1      cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32   1.1      cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33   1.1      cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34   1.1      cgd  * SUCH DAMAGE.
     35   1.1      cgd  *
     36   1.4      cgd  *	from: @(#)nfs_socket.c	7.23 (Berkeley) 4/20/91
     37  1.11      cgd  *	$Id: nfs_socket.c,v 1.11 1994/04/10 06:45:59 cgd Exp $
     38   1.1      cgd  */
     39   1.1      cgd 
     40   1.1      cgd /*
     41   1.1      cgd  * Socket operations for use by nfs
     42   1.1      cgd  */
     43   1.1      cgd 
     44   1.9  mycroft #include <sys/param.h>
     45   1.9  mycroft #include <sys/systm.h>
     46   1.9  mycroft #include <sys/proc.h>
     47   1.9  mycroft #include <sys/mount.h>
     48   1.9  mycroft #include <sys/kernel.h>
     49   1.9  mycroft #include <sys/malloc.h>
     50   1.9  mycroft #include <sys/mbuf.h>
     51   1.9  mycroft #include <sys/namei.h>
     52   1.9  mycroft #include <sys/vnode.h>
     53   1.9  mycroft #include <sys/domain.h>
     54   1.9  mycroft #include <sys/protosw.h>
     55   1.9  mycroft #include <sys/socket.h>
     56   1.9  mycroft #include <sys/socketvar.h>
     57   1.9  mycroft #include <sys/syslog.h>
     58   1.9  mycroft #include <sys/tprintf.h>
     59   1.1      cgd 
     60   1.9  mycroft #include <netinet/in.h>
     61   1.9  mycroft #include <netinet/tcp.h>
     62   1.9  mycroft 
     63   1.9  mycroft #include <nfs/rpcv2.h>
     64   1.9  mycroft #include <nfs/nfsv2.h>
     65   1.9  mycroft #include <nfs/nfs.h>
     66   1.9  mycroft #include <nfs/xdr_subs.h>
     67   1.9  mycroft #include <nfs/nfsm_subs.h>
     68   1.9  mycroft #include <nfs/nfsmount.h>
     69   1.1      cgd 
     70   1.1      cgd #define	TRUE	1
     71   1.1      cgd #define	FALSE	0
     72   1.1      cgd 
     73   1.1      cgd /*
     74   1.1      cgd  * External data, mostly RPC constants in XDR form
     75   1.1      cgd  */
     76   1.1      cgd extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
     77   1.1      cgd 	rpc_msgaccepted, rpc_call;
     78   1.1      cgd extern u_long nfs_prog, nfs_vers;
     79   1.1      cgd /* Maybe these should be bits in a u_long ?? */
     80   1.3    glass /*
     81   1.3    glass  * Static array that defines which nfs rpc's are nonidempotent
     82   1.3    glass  */
     83   1.3    glass int nonidempotent[NFS_NPROCS] = {
     84   1.3    glass 	FALSE,
     85   1.3    glass 	FALSE,
     86   1.3    glass 	TRUE,
     87   1.3    glass 	FALSE,
     88   1.3    glass 	FALSE,
     89   1.3    glass 	FALSE,
     90   1.3    glass 	FALSE,
     91   1.3    glass 	FALSE,
     92   1.3    glass 	TRUE,
     93   1.3    glass 	TRUE,
     94   1.3    glass 	TRUE,
     95   1.3    glass 	TRUE,
     96   1.3    glass 	TRUE,
     97   1.3    glass 	TRUE,
     98   1.3    glass 	TRUE,
     99   1.3    glass 	TRUE,
    100   1.3    glass 	FALSE,
    101   1.3    glass 	FALSE,
    102   1.3    glass };
    103   1.1      cgd static int compressrequest[NFS_NPROCS] = {
    104   1.1      cgd 	FALSE,
    105   1.1      cgd 	TRUE,
    106   1.1      cgd 	TRUE,
    107   1.1      cgd 	FALSE,
    108   1.1      cgd 	TRUE,
    109   1.1      cgd 	TRUE,
    110   1.1      cgd 	TRUE,
    111   1.1      cgd 	FALSE,
    112   1.1      cgd 	FALSE,
    113   1.1      cgd 	TRUE,
    114   1.1      cgd 	TRUE,
    115   1.1      cgd 	TRUE,
    116   1.1      cgd 	TRUE,
    117   1.1      cgd 	TRUE,
    118   1.1      cgd 	TRUE,
    119   1.1      cgd 	TRUE,
    120   1.1      cgd 	TRUE,
    121   1.1      cgd 	TRUE,
    122   1.1      cgd };
    123   1.1      cgd int	nfs_sbwait();
    124   1.1      cgd void	nfs_disconnect();
    125   1.1      cgd struct mbuf *nfs_compress(), *nfs_uncompress();
    126   1.1      cgd 
    127   1.1      cgd 
    128   1.1      cgd struct nfsreq nfsreqh;
    129   1.1      cgd int nfsrexmtthresh = NFS_FISHY;
    130   1.1      cgd int nfs_tcpnodelay = 1;
    131   1.1      cgd 
    132   1.1      cgd /*
    133   1.1      cgd  * Initialize sockets and congestion for a new NFS connection.
    134   1.1      cgd  * We do not free the sockaddr if error.
    135   1.1      cgd  */
    136   1.1      cgd nfs_connect(nmp)
    137   1.1      cgd 	register struct nfsmount *nmp;
    138   1.1      cgd {
    139   1.1      cgd 	register struct socket *so;
    140  1.11      cgd 	struct sockaddr *saddr;
    141   1.1      cgd 	int s, error, bufsize;
    142   1.1      cgd 	struct mbuf *m;
    143  1.11      cgd 	struct sockaddr_in *sin;
    144  1.11      cgd 	u_short tport;
    145   1.1      cgd 
    146   1.1      cgd 	nmp->nm_so = (struct socket *)0;
    147  1.11      cgd 	saddr = mtod(nmp->nm_nam, struct sockaddr *);
    148  1.11      cgd 	if (error = socreate(saddr->sa_family,
    149   1.1      cgd 		&nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
    150   1.1      cgd 		goto bad;
    151   1.1      cgd 	so = nmp->nm_so;
    152   1.1      cgd 	nmp->nm_soflags = so->so_proto->pr_flags;
    153   1.1      cgd 
    154   1.2      cgd 	/*
    155   1.2      cgd 	 * Some servers require that the client port be a reserved port number.
    156   1.2      cgd 	 */
    157   1.2      cgd 	if (saddr->sa_family == AF_INET) {
    158   1.2      cgd 		MGET(m, M_WAIT, MT_SONAME);
    159   1.2      cgd 		sin = mtod(m, struct sockaddr_in *);
    160   1.2      cgd 		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
    161   1.2      cgd 		sin->sin_family = AF_INET;
    162   1.2      cgd 		sin->sin_addr.s_addr = INADDR_ANY;
    163   1.2      cgd 		tport = IPPORT_RESERVED - 1;
    164   1.2      cgd 		sin->sin_port = htons(tport);
    165   1.2      cgd 		while (sobind(so, m) == EADDRINUSE &&
    166   1.2      cgd 		       --tport > IPPORT_RESERVED / 2)
    167   1.2      cgd 			sin->sin_port = htons(tport);
    168   1.2      cgd 		m_freem(m);
    169   1.2      cgd 	}
    170   1.2      cgd 
    171   1.1      cgd 	if (nmp->nm_sotype == SOCK_DGRAM)
    172   1.1      cgd 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR),
    173   1.1      cgd 		    NFS_MAXPACKET);
    174   1.1      cgd 	else
    175   1.1      cgd 		bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)),
    176   1.1      cgd 		    NFS_MAXPACKET + sizeof(u_long));
    177   1.1      cgd 	if (error = soreserve(so, bufsize, bufsize))
    178   1.1      cgd 		goto bad;
    179   1.1      cgd 
    180   1.1      cgd 	/*
    181   1.1      cgd 	 * Protocols that do not require connections may be optionally left
    182   1.1      cgd 	 * unconnected for servers that reply from a port other than NFS_PORT.
    183   1.1      cgd 	 */
    184   1.1      cgd 	if (nmp->nm_flag & NFSMNT_NOCONN) {
    185   1.1      cgd 		if (nmp->nm_soflags & PR_CONNREQUIRED) {
    186   1.1      cgd 			error = ENOTCONN;
    187   1.1      cgd 			goto bad;
    188   1.1      cgd 		}
    189   1.1      cgd 	} else {
    190   1.1      cgd 		if (error = soconnect(so, nmp->nm_nam))
    191   1.1      cgd 			goto bad;
    192   1.1      cgd 
    193   1.1      cgd 		/*
    194   1.1      cgd 		 * Wait for the connection to complete. Cribbed from the
    195   1.1      cgd 		 * connect system call but with the wait at negative prio.
    196   1.1      cgd 		 */
    197   1.1      cgd 		s = splnet();
    198   1.1      cgd 		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
    199   1.1      cgd 			(void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0);
    200   1.1      cgd 		splx(s);
    201   1.1      cgd 		if (so->so_error) {
    202   1.1      cgd 			error = so->so_error;
    203   1.1      cgd 			goto bad;
    204   1.1      cgd 		}
    205   1.1      cgd 	}
    206   1.1      cgd 	if (nmp->nm_sotype == SOCK_DGRAM) {
    207   1.1      cgd 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    208   1.1      cgd 			so->so_rcv.sb_timeo = (5 * hz);
    209   1.1      cgd 			so->so_snd.sb_timeo = (5 * hz);
    210   1.1      cgd 		} else {
    211   1.1      cgd 			so->so_rcv.sb_timeo = 0;
    212   1.1      cgd 			so->so_snd.sb_timeo = 0;
    213   1.1      cgd 		}
    214   1.1      cgd 		nmp->nm_rto = NFS_TIMEO;
    215   1.1      cgd 	} else {
    216   1.1      cgd 		if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
    217   1.1      cgd 			so->so_rcv.sb_timeo = (5 * hz);
    218   1.1      cgd 			so->so_snd.sb_timeo = (5 * hz);
    219   1.1      cgd 		} else {
    220   1.1      cgd 			so->so_rcv.sb_timeo = 0;
    221   1.1      cgd 			so->so_snd.sb_timeo = 0;
    222   1.1      cgd 		}
    223   1.1      cgd 		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    224   1.1      cgd 			MGET(m, M_WAIT, MT_SOOPTS);
    225   1.1      cgd 			*mtod(m, int *) = 1;
    226   1.1      cgd 			m->m_len = sizeof(int);
    227   1.1      cgd 			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
    228   1.1      cgd 		}
    229   1.1      cgd 		if (so->so_proto->pr_domain->dom_family == AF_INET &&
    230   1.1      cgd 		    so->so_proto->pr_protocol == IPPROTO_TCP &&
    231   1.1      cgd 		    nfs_tcpnodelay) {
    232   1.1      cgd 			MGET(m, M_WAIT, MT_SOOPTS);
    233   1.1      cgd 			*mtod(m, int *) = 1;
    234   1.1      cgd 			m->m_len = sizeof(int);
    235   1.1      cgd 			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
    236   1.1      cgd 		}
    237   1.1      cgd 		nmp->nm_rto = 10 * NFS_TIMEO;		/* XXX */
    238   1.1      cgd 	}
    239   1.1      cgd 	so->so_rcv.sb_flags |= SB_NOINTR;
    240   1.1      cgd 	so->so_snd.sb_flags |= SB_NOINTR;
    241   1.1      cgd 
    242   1.1      cgd 	/* Initialize other non-zero congestion variables */
    243   1.1      cgd 	nmp->nm_window = 2;			/* Initial send window */
    244   1.1      cgd 	nmp->nm_ssthresh = NFS_MAXWINDOW;	/* Slowstart threshold */
    245   1.1      cgd 	nmp->nm_rttvar = nmp->nm_rto << 1;
    246   1.1      cgd 	nmp->nm_sent = 0;
    247   1.1      cgd 	nmp->nm_currexmit = 0;
    248   1.1      cgd 	return (0);
    249   1.1      cgd 
    250   1.1      cgd bad:
    251   1.1      cgd 	nfs_disconnect(nmp);
    252   1.1      cgd 	return (error);
    253   1.1      cgd }
    254   1.1      cgd 
    255   1.1      cgd /*
    256   1.1      cgd  * Reconnect routine:
    257   1.1      cgd  * Called when a connection is broken on a reliable protocol.
    258   1.1      cgd  * - clean up the old socket
    259   1.1      cgd  * - nfs_connect() again
    260   1.1      cgd  * - set R_MUSTRESEND for all outstanding requests on mount point
    261   1.1      cgd  * If this fails the mount point is DEAD!
    262   1.1      cgd  * nb: Must be called with the nfs_solock() set on the mount point.
    263   1.1      cgd  */
    264   1.1      cgd nfs_reconnect(rep, nmp)
    265   1.1      cgd 	register struct nfsreq *rep;
    266   1.1      cgd 	register struct nfsmount *nmp;
    267   1.1      cgd {
    268   1.1      cgd 	register struct nfsreq *rp;
    269   1.1      cgd 	int error;
    270   1.1      cgd 
    271   1.1      cgd 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    272   1.1      cgd 	    "trying reconnect");
    273   1.1      cgd 	while (error = nfs_connect(nmp)) {
    274   1.1      cgd #ifdef lint
    275   1.1      cgd 		error = error;
    276   1.1      cgd #endif /* lint */
    277   1.1      cgd 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp))
    278   1.1      cgd 			return (EINTR);
    279   1.1      cgd 		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
    280   1.1      cgd 	}
    281   1.1      cgd 	nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    282   1.1      cgd 	    "reconnected");
    283   1.1      cgd 
    284   1.1      cgd 	/*
    285   1.1      cgd 	 * Loop through outstanding request list and fix up all requests
    286   1.1      cgd 	 * on old socket.
    287   1.1      cgd 	 */
    288   1.1      cgd 	rp = nfsreqh.r_next;
    289   1.1      cgd 	while (rp != &nfsreqh) {
    290   1.1      cgd 		if (rp->r_nmp == nmp)
    291   1.1      cgd 			rp->r_flags |= R_MUSTRESEND;
    292   1.1      cgd 		rp = rp->r_next;
    293   1.1      cgd 	}
    294   1.1      cgd 	return (0);
    295   1.1      cgd }
    296   1.1      cgd 
    297   1.1      cgd /*
    298   1.1      cgd  * NFS disconnect. Clean up and unlink.
    299   1.1      cgd  */
    300   1.1      cgd void
    301   1.1      cgd nfs_disconnect(nmp)
    302   1.1      cgd 	register struct nfsmount *nmp;
    303   1.1      cgd {
    304   1.1      cgd 	register struct socket *so;
    305   1.1      cgd 
    306   1.1      cgd 	if (nmp->nm_so) {
    307   1.1      cgd 		so = nmp->nm_so;
    308   1.1      cgd 		nmp->nm_so = (struct socket *)0;
    309   1.1      cgd 		soshutdown(so, 2);
    310   1.1      cgd 		soclose(so);
    311   1.1      cgd 	}
    312   1.1      cgd }
    313   1.1      cgd 
    314   1.1      cgd /*
    315   1.1      cgd  * This is the nfs send routine. For connection based socket types, it
    316   1.1      cgd  * must be called with an nfs_solock() on the socket.
    317   1.1      cgd  * "rep == NULL" indicates that it has been called from a server.
    318   1.1      cgd  */
    319   1.1      cgd nfs_send(so, nam, top, rep)
    320   1.1      cgd 	register struct socket *so;
    321   1.1      cgd 	struct mbuf *nam;
    322   1.1      cgd 	register struct mbuf *top;
    323   1.1      cgd 	struct nfsreq *rep;
    324   1.1      cgd {
    325   1.1      cgd 	struct mbuf *sendnam;
    326   1.1      cgd 	int error, soflags;
    327   1.1      cgd 
    328   1.1      cgd 	if (rep) {
    329   1.1      cgd 		if (rep->r_flags & R_SOFTTERM) {
    330   1.1      cgd 			m_freem(top);
    331   1.1      cgd 			return (EINTR);
    332   1.1      cgd 		}
    333   1.1      cgd 		if (rep->r_nmp->nm_so == NULL &&
    334   1.1      cgd 		    (error = nfs_reconnect(rep, rep->r_nmp)))
    335   1.1      cgd 			return (error);
    336   1.1      cgd 		rep->r_flags &= ~R_MUSTRESEND;
    337   1.1      cgd 		so = rep->r_nmp->nm_so;
    338   1.1      cgd 		soflags = rep->r_nmp->nm_soflags;
    339   1.1      cgd 	} else
    340   1.1      cgd 		soflags = so->so_proto->pr_flags;
    341   1.1      cgd 	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
    342   1.1      cgd 		sendnam = (struct mbuf *)0;
    343   1.1      cgd 	else
    344   1.1      cgd 		sendnam = nam;
    345   1.1      cgd 
    346   1.1      cgd 	error = sosend(so, sendnam, (struct uio *)0, top,
    347   1.1      cgd 		(struct mbuf *)0, 0);
    348   1.1      cgd 	if (error == EWOULDBLOCK && rep) {
    349   1.1      cgd 		if (rep->r_flags & R_SOFTTERM)
    350   1.1      cgd 			error = EINTR;
    351   1.1      cgd 		else {
    352   1.1      cgd 			rep->r_flags |= R_MUSTRESEND;
    353   1.1      cgd 			error = 0;
    354   1.1      cgd 		}
    355   1.1      cgd 	}
    356   1.1      cgd 	/*
    357   1.1      cgd 	 * Ignore socket errors??
    358   1.1      cgd 	 */
    359   1.1      cgd 	if (error && error != EINTR && error != ERESTART)
    360   1.1      cgd 		error = 0;
    361   1.1      cgd 	return (error);
    362   1.1      cgd }
    363   1.1      cgd 
    364   1.1      cgd /*
    365   1.1      cgd  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
    366   1.1      cgd  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
    367   1.1      cgd  * Mark and consolidate the data into a new mbuf list.
    368   1.1      cgd  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
    369   1.1      cgd  *     small mbufs.
    370   1.1      cgd  * For SOCK_STREAM we must be very careful to read an entire record once
    371   1.1      cgd  * we have read any of it, even if the system call has been interrupted.
    372   1.1      cgd  */
    373   1.1      cgd nfs_receive(so, aname, mp, rep)
    374   1.1      cgd 	register struct socket *so;
    375   1.1      cgd 	struct mbuf **aname;
    376   1.1      cgd 	struct mbuf **mp;
    377   1.1      cgd 	register struct nfsreq *rep;
    378   1.1      cgd {
    379   1.1      cgd 	struct uio auio;
    380   1.1      cgd 	struct iovec aio;
    381   1.1      cgd 	register struct mbuf *m;
    382   1.1      cgd 	struct mbuf *m2, *mnew, **mbp;
    383   1.1      cgd 	caddr_t fcp, tcp;
    384   1.1      cgd 	u_long len;
    385   1.1      cgd 	struct mbuf **getnam;
    386   1.1      cgd 	int error, siz, mlen, soflags, rcvflg;
    387   1.1      cgd 
    388   1.1      cgd 	/*
    389   1.1      cgd 	 * Set up arguments for soreceive()
    390   1.1      cgd 	 */
    391   1.1      cgd 	*mp = (struct mbuf *)0;
    392   1.1      cgd 	*aname = (struct mbuf *)0;
    393   1.1      cgd 	if (rep)
    394   1.1      cgd 		soflags = rep->r_nmp->nm_soflags;
    395   1.1      cgd 	else
    396   1.1      cgd 		soflags = so->so_proto->pr_flags;
    397   1.1      cgd 
    398   1.1      cgd 	/*
    399   1.1      cgd 	 * For reliable protocols, lock against other senders/receivers
    400   1.1      cgd 	 * in case a reconnect is necessary.
    401   1.1      cgd 	 * For SOCK_STREAM, first get the Record Mark to find out how much
    402   1.1      cgd 	 * more there is to get.
    403   1.1      cgd 	 * We must lock the socket against other receivers
    404   1.1      cgd 	 * until we have an entire rpc request/reply.
    405   1.1      cgd 	 */
    406   1.1      cgd 	if (soflags & PR_CONNREQUIRED) {
    407   1.1      cgd tryagain:
    408   1.1      cgd 		/*
    409   1.1      cgd 		 * Check for fatal errors and resending request.
    410   1.1      cgd 		 */
    411   1.1      cgd 		if (rep) {
    412   1.1      cgd 			/*
    413   1.1      cgd 			 * Ugh: If a reconnect attempt just happened, nm_so
    414   1.1      cgd 			 * would have changed. NULL indicates a failed
    415   1.1      cgd 			 * attempt that has essentially shut down this
    416   1.1      cgd 			 * mount point.
    417   1.1      cgd 			 */
    418   1.1      cgd 			if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL ||
    419   1.1      cgd 				(rep->r_flags & R_SOFTTERM))
    420   1.1      cgd 				return (EINTR);
    421   1.1      cgd 			while (rep->r_flags & R_MUSTRESEND) {
    422   1.1      cgd 				m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
    423   1.1      cgd 				nfsstats.rpcretries++;
    424   1.1      cgd 				if (error = nfs_send(so, rep->r_nmp->nm_nam, m,
    425   1.1      cgd 					rep))
    426   1.1      cgd 					goto errout;
    427   1.1      cgd 			}
    428   1.1      cgd 		}
    429   1.1      cgd 		if ((soflags & PR_ATOMIC) == 0) {
    430   1.1      cgd 			aio.iov_base = (caddr_t) &len;
    431   1.1      cgd 			aio.iov_len = sizeof(u_long);
    432   1.1      cgd 			auio.uio_iov = &aio;
    433   1.1      cgd 			auio.uio_iovcnt = 1;
    434   1.1      cgd 			auio.uio_segflg = UIO_SYSSPACE;
    435   1.1      cgd 			auio.uio_rw = UIO_READ;
    436   1.1      cgd 			auio.uio_procp = (struct proc *)0;
    437   1.1      cgd 			auio.uio_offset = 0;
    438   1.1      cgd 			auio.uio_resid = sizeof(u_long);
    439   1.1      cgd 			do {
    440   1.1      cgd 			    rcvflg = MSG_WAITALL;
    441   1.1      cgd 			    error = soreceive(so, (struct mbuf **)0, &auio,
    442   1.1      cgd 				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
    443   1.1      cgd 			    if (error == EWOULDBLOCK && rep) {
    444   1.1      cgd 				if (rep->r_flags & R_SOFTTERM)
    445   1.1      cgd 					return (EINTR);
    446   1.1      cgd 				if (rep->r_flags & R_MUSTRESEND)
    447   1.1      cgd 					goto tryagain;
    448   1.1      cgd 			    }
    449   1.1      cgd 			} while (error == EWOULDBLOCK);
    450   1.1      cgd 			if (!error && auio.uio_resid > 0) {
    451   1.1      cgd 			    if (rep)
    452   1.1      cgd 				log(LOG_INFO,
    453   1.1      cgd 				   "short receive (%d/%d) from nfs server %s\n",
    454   1.1      cgd 				   sizeof(u_long) - auio.uio_resid,
    455   1.1      cgd 				   sizeof(u_long),
    456   1.1      cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    457   1.1      cgd 			    error = EPIPE;
    458   1.1      cgd 			}
    459   1.1      cgd 			if (error)
    460   1.1      cgd 				goto errout;
    461   1.1      cgd 			len = ntohl(len) & ~0x80000000;
    462   1.1      cgd 			/*
    463   1.1      cgd 			 * This is SERIOUS! We are out of sync with the sender
    464   1.1      cgd 			 * and forcing a disconnect/reconnect is all I can do.
    465   1.1      cgd 			 */
    466   1.1      cgd 			if (len > NFS_MAXPACKET) {
    467   1.1      cgd 			    if (rep)
    468   1.1      cgd 				log(LOG_ERR, "%s (%d) from nfs server %s\n",
    469   1.1      cgd 				    "impossible packet length",
    470   1.1      cgd 				    len,
    471   1.1      cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    472   1.1      cgd 			    error = EFBIG;
    473   1.1      cgd 			    goto errout;
    474   1.1      cgd 			}
    475   1.1      cgd 			auio.uio_resid = len;
    476   1.1      cgd 			do {
    477   1.1      cgd 			    rcvflg = MSG_WAITALL;
    478   1.1      cgd 			    error =  soreceive(so, (struct mbuf **)0,
    479   1.1      cgd 				&auio, mp, (struct mbuf **)0, &rcvflg);
    480   1.1      cgd 			} while (error == EWOULDBLOCK || error == EINTR ||
    481   1.1      cgd 				 error == ERESTART);
    482   1.1      cgd 			if (!error && auio.uio_resid > 0) {
    483   1.1      cgd 			    if (rep)
    484   1.1      cgd 				log(LOG_INFO,
    485   1.1      cgd 				   "short receive (%d/%d) from nfs server %s\n",
    486   1.1      cgd 				   len - auio.uio_resid, len,
    487   1.1      cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    488   1.1      cgd 			    error = EPIPE;
    489   1.1      cgd 			}
    490   1.1      cgd 		} else {
    491   1.1      cgd 			auio.uio_resid = len = 1000000;	/* Anything Big */
    492   1.1      cgd 			do {
    493   1.1      cgd 			    rcvflg = 0;
    494   1.1      cgd 			    error =  soreceive(so, (struct mbuf **)0,
    495   1.1      cgd 				&auio, mp, (struct mbuf **)0, &rcvflg);
    496   1.1      cgd 			    if (error == EWOULDBLOCK && rep) {
    497   1.1      cgd 				if (rep->r_flags & R_SOFTTERM)
    498   1.1      cgd 					return (EINTR);
    499   1.1      cgd 				if (rep->r_flags & R_MUSTRESEND)
    500   1.1      cgd 					goto tryagain;
    501   1.1      cgd 			    }
    502   1.1      cgd 			} while (error == EWOULDBLOCK);
    503   1.1      cgd 			if (!error && *mp == NULL)
    504   1.1      cgd 				error = EPIPE;
    505   1.1      cgd 			len -= auio.uio_resid;
    506   1.1      cgd 		}
    507   1.1      cgd errout:
    508   1.1      cgd 		if (error && rep && error != EINTR && error != ERESTART) {
    509   1.1      cgd 			m_freem(*mp);
    510   1.1      cgd 			*mp = (struct mbuf *)0;
    511   1.1      cgd 			if (error != EPIPE && rep)
    512   1.1      cgd 				log(LOG_INFO,
    513   1.1      cgd 				    "receive error %d from nfs server %s\n",
    514   1.1      cgd 				    error,
    515   1.1      cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    516   1.1      cgd 			nfs_disconnect(rep->r_nmp);
    517   1.1      cgd 			error = nfs_reconnect(rep, rep->r_nmp);
    518   1.1      cgd 			if (!error)
    519   1.1      cgd 				goto tryagain;
    520   1.1      cgd 		}
    521   1.1      cgd 	} else {
    522   1.1      cgd 		if (so->so_state & SS_ISCONNECTED)
    523   1.1      cgd 			getnam = (struct mbuf **)0;
    524   1.1      cgd 		else
    525   1.1      cgd 			getnam = aname;
    526   1.1      cgd 		auio.uio_resid = len = 1000000;
    527   1.1      cgd 		do {
    528   1.1      cgd 			rcvflg = 0;
    529   1.1      cgd 			error =  soreceive(so, getnam, &auio, mp,
    530   1.1      cgd 				(struct mbuf **)0, &rcvflg);
    531   1.1      cgd 			if (error == EWOULDBLOCK && rep &&
    532   1.1      cgd 			    (rep->r_flags & R_SOFTTERM))
    533   1.1      cgd 				return (EINTR);
    534   1.1      cgd 		} while (error == EWOULDBLOCK);
    535   1.1      cgd 		len -= auio.uio_resid;
    536   1.1      cgd 	}
    537   1.1      cgd 	if (error) {
    538   1.1      cgd 		m_freem(*mp);
    539   1.1      cgd 		*mp = (struct mbuf *)0;
    540   1.1      cgd 	}
    541   1.1      cgd 	/*
    542   1.1      cgd 	 * Search for any mbufs that are not a multiple of 4 bytes long.
    543   1.1      cgd 	 * These could cause pointer alignment problems, so copy them to
    544   1.1      cgd 	 * well aligned mbufs.
    545   1.1      cgd 	 */
    546   1.1      cgd 	m = *mp;
    547   1.1      cgd 	mbp = mp;
    548   1.1      cgd 	while (m) {
    549   1.1      cgd 		/*
    550   1.1      cgd 		 * All this for something that may never happen.
    551   1.1      cgd 		 */
    552   1.1      cgd 		if (m->m_next && (m->m_len & 0x3)) {
    553   1.1      cgd 			printf("nfs_rcv odd length!\n");
    554   1.1      cgd 			mlen = 0;
    555   1.1      cgd 			while (m) {
    556   1.1      cgd 				fcp = mtod(m, caddr_t);
    557   1.1      cgd 				while (m->m_len > 0) {
    558   1.1      cgd 					if (mlen == 0) {
    559   1.1      cgd 						MGET(m2, M_WAIT, MT_DATA);
    560   1.1      cgd 						if (len >= MINCLSIZE)
    561   1.1      cgd 							MCLGET(m2, M_WAIT);
    562   1.1      cgd 						m2->m_len = 0;
    563   1.1      cgd 						mlen = M_TRAILINGSPACE(m2);
    564   1.1      cgd 						tcp = mtod(m2, caddr_t);
    565   1.1      cgd 						*mbp = m2;
    566   1.1      cgd 						mbp = &m2->m_next;
    567   1.1      cgd 					}
    568   1.1      cgd 					siz = MIN(mlen, m->m_len);
    569   1.1      cgd 					bcopy(fcp, tcp, siz);
    570   1.1      cgd 					m2->m_len += siz;
    571   1.1      cgd 					mlen -= siz;
    572   1.1      cgd 					len -= siz;
    573   1.1      cgd 					tcp += siz;
    574   1.1      cgd 					m->m_len -= siz;
    575   1.1      cgd 					fcp += siz;
    576   1.1      cgd 				}
    577   1.1      cgd 				MFREE(m, mnew);
    578   1.1      cgd 				m = mnew;
    579   1.1      cgd 			}
    580   1.1      cgd 			break;
    581   1.1      cgd 		}
    582   1.1      cgd 		len -= m->m_len;
    583   1.1      cgd 		mbp = &m->m_next;
    584   1.1      cgd 		m = m->m_next;
    585   1.1      cgd 	}
    586   1.1      cgd 	return (error);
    587   1.1      cgd }
    588   1.1      cgd 
    589   1.1      cgd /*
    590   1.1      cgd  * Implement receipt of reply on a socket.
    591   1.1      cgd  * We must search through the list of received datagrams matching them
    592   1.1      cgd  * with outstanding requests using the xid, until ours is found.
    593   1.1      cgd  */
    594   1.1      cgd /* ARGSUSED */
    595   1.1      cgd nfs_reply(nmp, myrep)
    596   1.1      cgd 	struct nfsmount *nmp;
    597   1.1      cgd 	struct nfsreq *myrep;
    598   1.1      cgd {
    599   1.1      cgd 	register struct mbuf *m;
    600   1.1      cgd 	register struct nfsreq *rep;
    601   1.1      cgd 	register int error = 0;
    602   1.1      cgd 	u_long rxid;
    603   1.1      cgd 	struct mbuf *mp, *nam;
    604   1.1      cgd 	char *cp;
    605   1.1      cgd 	int cnt, xfer;
    606   1.1      cgd 
    607   1.1      cgd 	/*
    608   1.1      cgd 	 * Loop around until we get our own reply
    609   1.1      cgd 	 */
    610   1.1      cgd 	for (;;) {
    611   1.1      cgd 		/*
    612   1.1      cgd 		 * Lock against other receivers so that I don't get stuck in
    613   1.1      cgd 		 * sbwait() after someone else has received my reply for me.
    614   1.1      cgd 		 * Also necessary for connection based protocols to avoid
    615   1.1      cgd 		 * race conditions during a reconnect.
    616   1.1      cgd 		 */
    617   1.1      cgd 		nfs_solock(&nmp->nm_flag);
    618   1.1      cgd 		/* Already received, bye bye */
    619   1.1      cgd 		if (myrep->r_mrep != NULL) {
    620   1.1      cgd 			nfs_sounlock(&nmp->nm_flag);
    621   1.1      cgd 			return (0);
    622   1.1      cgd 		}
    623   1.1      cgd 		/*
    624   1.1      cgd 		 * Get the next Rpc reply off the socket
    625   1.1      cgd 		 */
    626   1.1      cgd 		if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) {
    627   1.1      cgd 			nfs_sounlock(&nmp->nm_flag);
    628   1.1      cgd 
    629   1.1      cgd 			/*
    630   1.1      cgd 			 * Ignore routing errors on connectionless protocols??
    631   1.1      cgd 			 */
    632   1.1      cgd 			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
    633   1.1      cgd 				nmp->nm_so->so_error = 0;
    634   1.1      cgd 				continue;
    635   1.1      cgd 			}
    636   1.1      cgd 
    637   1.1      cgd 			/*
    638   1.1      cgd 			 * Otherwise cleanup and return a fatal error.
    639   1.1      cgd 			 */
    640   1.1      cgd 			if (myrep->r_flags & R_TIMING) {
    641   1.1      cgd 				myrep->r_flags &= ~R_TIMING;
    642   1.1      cgd 				nmp->nm_rtt = -1;
    643   1.1      cgd 			}
    644   1.1      cgd 			if (myrep->r_flags & R_SENT) {
    645   1.1      cgd 				myrep->r_flags &= ~R_SENT;
    646   1.1      cgd 				nmp->nm_sent--;
    647   1.1      cgd 			}
    648   1.1      cgd 			return (error);
    649   1.1      cgd 		}
    650   1.1      cgd 
    651   1.1      cgd 		/*
    652   1.1      cgd 		 * Get the xid and check that it is an rpc reply
    653   1.1      cgd 		 */
    654   1.1      cgd 		m = mp;
    655   1.1      cgd 		while (m && m->m_len == 0)
    656   1.1      cgd 			m = m->m_next;
    657   1.1      cgd 		if (m == NULL) {
    658   1.1      cgd 			nfsstats.rpcinvalid++;
    659   1.1      cgd 			m_freem(mp);
    660   1.1      cgd 			nfs_sounlock(&nmp->nm_flag);
    661   1.1      cgd 			continue;
    662   1.1      cgd 		}
    663   1.1      cgd 		bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED);
    664   1.1      cgd 		/*
    665   1.1      cgd 		 * Loop through the request list to match up the reply
    666   1.1      cgd 		 * Iff no match, just drop the datagram
    667   1.1      cgd 		 */
    668   1.1      cgd 		m = mp;
    669   1.1      cgd 		rep = nfsreqh.r_next;
    670   1.1      cgd 		while (rep != &nfsreqh) {
    671   1.1      cgd 			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
    672   1.1      cgd 				/* Found it.. */
    673   1.1      cgd 				rep->r_mrep = m;
    674   1.1      cgd 				/*
    675   1.1      cgd 				 * Update timing
    676   1.1      cgd 				 */
    677   1.1      cgd 				if (rep->r_flags & R_TIMING) {
    678   1.1      cgd 					nfs_updatetimer(rep->r_nmp);
    679   1.1      cgd 					rep->r_flags &= ~R_TIMING;
    680   1.1      cgd 					rep->r_nmp->nm_rtt = -1;
    681   1.1      cgd 				}
    682   1.1      cgd 				if (rep->r_flags & R_SENT) {
    683   1.1      cgd 					rep->r_flags &= ~R_SENT;
    684   1.1      cgd 					rep->r_nmp->nm_sent--;
    685   1.1      cgd 				}
    686   1.1      cgd 				break;
    687   1.1      cgd 			}
    688   1.1      cgd 			rep = rep->r_next;
    689   1.1      cgd 		}
    690   1.1      cgd 		nfs_sounlock(&nmp->nm_flag);
    691   1.1      cgd 		if (nam)
    692   1.1      cgd 			m_freem(nam);
    693   1.1      cgd 		/*
    694   1.1      cgd 		 * If not matched to a request, drop it.
    695   1.1      cgd 		 * If it's mine, get out.
    696   1.1      cgd 		 */
    697   1.1      cgd 		if (rep == &nfsreqh) {
    698   1.1      cgd 			nfsstats.rpcunexpected++;
    699   1.1      cgd 			m_freem(m);
    700   1.1      cgd 		} else if (rep == myrep)
    701   1.1      cgd 			return (0);
    702   1.1      cgd 	}
    703   1.1      cgd }
    704   1.1      cgd 
    705   1.1      cgd /*
    706   1.1      cgd  * nfs_request - goes something like this
    707   1.1      cgd  *	- fill in request struct
    708   1.1      cgd  *	- links it into list
    709   1.1      cgd  *	- calls nfs_send() for first transmit
    710   1.1      cgd  *	- calls nfs_receive() to get reply
    711   1.1      cgd  *	- break down rpc header and return with nfs reply pointed to
    712   1.1      cgd  *	  by mrep or error
    713   1.1      cgd  * nb: always frees up mreq mbuf list
    714   1.1      cgd  */
    715   1.1      cgd nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp)
    716   1.1      cgd 	struct vnode *vp;
    717   1.1      cgd 	struct mbuf *mreq;
    718   1.1      cgd 	u_long xid;
    719   1.1      cgd 	int procnum;
    720   1.1      cgd 	struct proc *procp;
    721   1.1      cgd 	int tryhard;
    722   1.1      cgd 	struct mount *mp;
    723   1.1      cgd 	struct mbuf **mrp;
    724   1.1      cgd 	struct mbuf **mdp;
    725   1.1      cgd 	caddr_t *dposp;
    726   1.1      cgd {
    727   1.1      cgd 	register struct mbuf *m, *mrep;
    728   1.1      cgd 	register struct nfsreq *rep;
    729   1.1      cgd 	register u_long *tl;
    730   1.1      cgd 	register int len;
    731   1.1      cgd 	struct nfsmount *nmp;
    732   1.1      cgd 	struct mbuf *md;
    733   1.1      cgd 	struct nfsreq *reph;
    734   1.1      cgd 	caddr_t dpos;
    735   1.1      cgd 	char *cp2;
    736   1.1      cgd 	int t1;
    737   1.1      cgd 	int s, compressed;
    738   1.1      cgd 	int error = 0;
    739   1.1      cgd 
    740   1.1      cgd 	nmp = VFSTONFS(mp);
    741   1.1      cgd 	m = mreq;
    742   1.1      cgd 	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
    743   1.1      cgd 	rep->r_xid = xid;
    744   1.1      cgd 	rep->r_nmp = nmp;
    745   1.1      cgd 	rep->r_vp = vp;
    746   1.1      cgd 	rep->r_procp = procp;
    747   1.1      cgd 	if ((nmp->nm_flag & NFSMNT_SOFT) ||
    748   1.1      cgd 	    ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard))
    749   1.1      cgd 		rep->r_retry = nmp->nm_retry;
    750   1.1      cgd 	else
    751   1.1      cgd 		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
    752   1.1      cgd 	rep->r_flags = rep->r_rexmit = 0;
    753   1.1      cgd 	/*
    754   1.1      cgd 	 * Three cases:
    755   1.1      cgd 	 * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
    756   1.1      cgd 	 * - idempotent requests on SOCK_DGRAM use 0
    757   1.1      cgd 	 * - Reliable transports, NFS_RELIABLETIMEO
    758   1.1      cgd 	 *   Timeouts are still done on reliable transports to ensure detection
    759   1.1      cgd 	 *   of excessive connection delay.
    760   1.1      cgd 	 */
    761   1.1      cgd 	if (nmp->nm_sotype != SOCK_DGRAM)
    762   1.1      cgd 		rep->r_timerinit = -NFS_RELIABLETIMEO;
    763   1.1      cgd 	else if (nonidempotent[procnum])
    764   1.1      cgd 		rep->r_timerinit = -NFS_MINIDEMTIMEO;
    765   1.1      cgd 	else
    766   1.1      cgd 		rep->r_timerinit = 0;
    767   1.1      cgd 	rep->r_timer = rep->r_timerinit;
    768   1.1      cgd 	rep->r_mrep = NULL;
    769   1.1      cgd 	len = 0;
    770   1.1      cgd 	while (m) {
    771   1.1      cgd 		len += m->m_len;
    772   1.1      cgd 		m = m->m_next;
    773   1.1      cgd 	}
    774   1.1      cgd 	mreq->m_pkthdr.len = len;
    775   1.1      cgd 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
    776   1.1      cgd 	compressed = 0;
    777   1.1      cgd 	m = mreq;
    778   1.1      cgd 	if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) {
    779   1.1      cgd 		mreq = nfs_compress(mreq);
    780   1.1      cgd 		if (mreq != m) {
    781   1.1      cgd 			len = mreq->m_pkthdr.len;
    782   1.1      cgd 			compressed++;
    783   1.1      cgd 		}
    784   1.1      cgd 	}
    785   1.1      cgd 	/*
    786   1.1      cgd 	 * For non-atomic protocols, insert a Sun RPC Record Mark.
    787   1.1      cgd 	 */
    788   1.1      cgd 	if ((nmp->nm_soflags & PR_ATOMIC) == 0) {
    789   1.1      cgd 		M_PREPEND(mreq, sizeof(u_long), M_WAIT);
    790   1.1      cgd 		*mtod(mreq, u_long *) = htonl(0x80000000 | len);
    791   1.1      cgd 	}
    792   1.1      cgd 	rep->r_mreq = mreq;
    793   1.1      cgd 
    794   1.1      cgd 	/*
    795   1.1      cgd 	 * Do the client side RPC.
    796   1.1      cgd 	 */
    797   1.1      cgd 	nfsstats.rpcrequests++;
    798   1.1      cgd 	/*
    799   1.1      cgd 	 * Chain request into list of outstanding requests. Be sure
    800   1.1      cgd 	 * to put it LAST so timer finds oldest requests first.
    801   1.1      cgd 	 */
    802   1.1      cgd 	s = splnet();
    803   1.1      cgd 	reph = &nfsreqh;
    804   1.1      cgd 	reph->r_prev->r_next = rep;
    805   1.1      cgd 	rep->r_prev = reph->r_prev;
    806   1.1      cgd 	reph->r_prev = rep;
    807   1.1      cgd 	rep->r_next = reph;
    808   1.1      cgd 	/*
    809   1.1      cgd 	 * If backing off another request or avoiding congestion, don't
    810   1.1      cgd 	 * send this one now but let timer do it. If not timing a request,
    811   1.1      cgd 	 * do it now.
    812   1.1      cgd 	 */
    813   1.1      cgd 	if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM ||
    814   1.1      cgd 	    (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) {
    815   1.1      cgd 		nmp->nm_sent++;
    816   1.1      cgd 		rep->r_flags |= R_SENT;
    817   1.1      cgd 		if (nmp->nm_rtt == -1) {
    818   1.1      cgd 			nmp->nm_rtt = 0;
    819   1.1      cgd 			rep->r_flags |= R_TIMING;
    820   1.1      cgd 		}
    821   1.1      cgd 		splx(s);
    822   1.1      cgd 		m = m_copym(mreq, 0, M_COPYALL, M_WAIT);
    823   1.1      cgd 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    824   1.1      cgd 			nfs_solock(&nmp->nm_flag);
    825   1.1      cgd 		error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
    826   1.1      cgd 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    827   1.1      cgd 			nfs_sounlock(&nmp->nm_flag);
    828   1.1      cgd 		if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error))
    829   1.1      cgd 			nmp->nm_so->so_error = error = 0;
    830   1.1      cgd 	} else
    831   1.1      cgd 		splx(s);
    832   1.1      cgd 
    833   1.1      cgd 	/*
    834   1.1      cgd 	 * Wait for the reply from our send or the timer's.
    835   1.1      cgd 	 */
    836   1.1      cgd 	if (!error)
    837   1.1      cgd 		error = nfs_reply(nmp, rep);
    838   1.1      cgd 
    839   1.1      cgd 	/*
    840   1.1      cgd 	 * RPC done, unlink the request.
    841   1.1      cgd 	 */
    842   1.1      cgd 	s = splnet();
    843   1.1      cgd 	rep->r_prev->r_next = rep->r_next;
    844   1.1      cgd 	rep->r_next->r_prev = rep->r_prev;
    845   1.1      cgd 	splx(s);
    846   1.1      cgd 
    847   1.1      cgd 	/*
    848   1.1      cgd 	 * If there was a successful reply and a tprintf msg.
    849   1.1      cgd 	 * tprintf a response.
    850   1.1      cgd 	 */
    851   1.1      cgd 	if (!error && (rep->r_flags & R_TPRINTFMSG))
    852   1.1      cgd 		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    853   1.1      cgd 		    "is alive again");
    854   1.1      cgd 	m_freem(rep->r_mreq);
    855   1.1      cgd 	mrep = rep->r_mrep;
    856   1.1      cgd 	FREE((caddr_t)rep, M_NFSREQ);
    857   1.1      cgd 	if (error)
    858   1.1      cgd 		return (error);
    859   1.1      cgd 
    860   1.1      cgd 	if (compressed)
    861   1.1      cgd 		mrep = nfs_uncompress(mrep);
    862   1.1      cgd 	md = mrep;
    863   1.1      cgd 	/*
    864   1.1      cgd 	 * break down the rpc header and check if ok
    865   1.1      cgd 	 */
    866   1.1      cgd 	dpos = mtod(md, caddr_t);
    867   1.1      cgd 	nfsm_disect(tl, u_long *, 5*NFSX_UNSIGNED);
    868   1.1      cgd 	tl += 2;
    869   1.1      cgd 	if (*tl++ == rpc_msgdenied) {
    870   1.1      cgd 		if (*tl == rpc_mismatch)
    871   1.1      cgd 			error = EOPNOTSUPP;
    872   1.1      cgd 		else
    873   1.1      cgd 			error = EACCES;
    874   1.1      cgd 		m_freem(mrep);
    875   1.1      cgd 		return (error);
    876   1.1      cgd 	}
    877   1.1      cgd 	/*
    878   1.1      cgd 	 * skip over the auth_verf, someday we may want to cache auth_short's
    879   1.1      cgd 	 * for nfs_reqhead(), but for now just dump it
    880   1.1      cgd 	 */
    881   1.1      cgd 	if (*++tl != 0) {
    882   1.1      cgd 		len = nfsm_rndup(fxdr_unsigned(long, *tl));
    883   1.1      cgd 		nfsm_adv(len);
    884   1.1      cgd 	}
    885   1.1      cgd 	nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    886   1.1      cgd 	/* 0 == ok */
    887   1.1      cgd 	if (*tl == 0) {
    888   1.1      cgd 		nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
    889   1.1      cgd 		if (*tl != 0) {
    890   1.1      cgd 			error = fxdr_unsigned(int, *tl);
    891   1.1      cgd 			m_freem(mrep);
    892   1.1      cgd 			return (error);
    893   1.1      cgd 		}
    894   1.1      cgd 		*mrp = mrep;
    895   1.1      cgd 		*mdp = md;
    896   1.1      cgd 		*dposp = dpos;
    897   1.1      cgd 		return (0);
    898   1.1      cgd 	}
    899   1.1      cgd 	m_freem(mrep);
    900   1.1      cgd 	return (EPROTONOSUPPORT);
    901   1.1      cgd nfsmout:
    902   1.1      cgd 	return (error);
    903   1.1      cgd }
    904   1.1      cgd 
    905   1.1      cgd /*
    906   1.1      cgd  * Get a request for the server main loop
    907   1.1      cgd  * - receive a request via. nfs_soreceive()
    908   1.1      cgd  * - verify it
    909   1.1      cgd  * - fill in the cred struct.
    910   1.1      cgd  */
    911   1.1      cgd nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr,
    912  1.10      cgd 	msk, mtch, wascomp, repstat)
    913   1.1      cgd 	struct socket *so;
    914   1.1      cgd 	u_long prog;
    915   1.1      cgd 	u_long vers;
    916   1.1      cgd 	int maxproc;
    917   1.1      cgd 	struct mbuf **nam;
    918   1.1      cgd 	struct mbuf **mrp;
    919   1.1      cgd 	struct mbuf **mdp;
    920   1.1      cgd 	caddr_t *dposp;
    921   1.1      cgd 	u_long *retxid;
    922   1.1      cgd 	u_long *procnum;
    923   1.1      cgd 	register struct ucred *cr;
    924   1.1      cgd 	struct mbuf *msk, *mtch;
    925  1.10      cgd 	int *wascomp, *repstat;
    926   1.1      cgd {
    927   1.1      cgd 	register int i;
    928   1.1      cgd 	register u_long *tl;
    929   1.1      cgd 	register long t1;
    930   1.1      cgd 	caddr_t dpos, cp2;
    931   1.1      cgd 	int error = 0;
    932   1.1      cgd 	struct mbuf *mrep, *md;
    933   1.1      cgd 	int len;
    934   1.1      cgd 
    935  1.10      cgd 	*repstat = 0;
    936   1.1      cgd 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    937   1.1      cgd 		error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    938   1.1      cgd 	} else {
    939   1.1      cgd 		mrep = (struct mbuf *)0;
    940   1.1      cgd 		do {
    941   1.1      cgd 			if (mrep) {
    942   1.1      cgd 				m_freem(*nam);
    943   1.1      cgd 				m_freem(mrep);
    944   1.1      cgd 			}
    945   1.1      cgd 			error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
    946   1.1      cgd 		} while (!error && nfs_badnam(*nam, msk, mtch));
    947   1.1      cgd 	}
    948   1.1      cgd 	if (error)
    949   1.1      cgd 		return (error);
    950   1.1      cgd 	md = mrep;
    951   1.1      cgd 	mrep = nfs_uncompress(mrep);
    952   1.1      cgd 	if (mrep != md) {
    953   1.1      cgd 		*wascomp = 1;
    954   1.1      cgd 		md = mrep;
    955   1.1      cgd 	} else
    956   1.1      cgd 		*wascomp = 0;
    957   1.1      cgd 	dpos = mtod(mrep, caddr_t);
    958   1.1      cgd 	nfsm_disect(tl, u_long *, 10*NFSX_UNSIGNED);
    959   1.8       ws 	*retxid = fxdr_unsigned(u_long, *tl++);
    960  1.10      cgd 	if (*tl++ != rpc_call || *tl++ != rpc_vers) {
    961   1.2      cgd 		*mrp = mrep;
    962   1.2      cgd 		*procnum = NFSPROC_NOOP;
    963   1.2      cgd 		*repstat = ERPCMISMATCH;
    964   1.2      cgd 		return (0);
    965   1.1      cgd 	}
    966   1.1      cgd 	if (*tl++ != prog) {
    967  1.10      cgd 		*mrp = mrep;
    968   1.2      cgd 		*procnum = NFSPROC_NOOP;
    969   1.2      cgd 		*repstat = EPROGUNAVAIL;
    970   1.2      cgd 		return (0);
    971   1.1      cgd 	}
    972   1.1      cgd 	if (*tl++ != vers) {
    973  1.10      cgd 		*mrp = mrep;
    974   1.2      cgd 		*procnum = NFSPROC_NOOP;
    975   1.2      cgd 		*repstat = EPROGMISMATCH;
    976   1.2      cgd 		return (0);
    977   1.1      cgd 	}
    978   1.1      cgd 	*procnum = fxdr_unsigned(u_long, *tl++);
    979   1.1      cgd 	if (*procnum == NFSPROC_NULL) {
    980   1.1      cgd 		*mrp = mrep;
    981   1.1      cgd 		return (0);
    982   1.1      cgd 	}
    983   1.1      cgd 	if (*procnum > maxproc || *tl++ != rpc_auth_unix) {
    984  1.10      cgd 		*mrp = mrep;
    985   1.2      cgd 		*procnum = NFSPROC_NOOP;
    986   1.2      cgd 		*repstat = EPROCUNAVAIL;
    987   1.2      cgd 		return (0);
    988   1.1      cgd 	}
    989   1.1      cgd 	len = fxdr_unsigned(int, *tl++);
    990   1.1      cgd 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
    991   1.1      cgd 		m_freem(mrep);
    992   1.1      cgd 		return (EBADRPC);
    993   1.1      cgd 	}
    994   1.1      cgd 	len = fxdr_unsigned(int, *++tl);
    995   1.1      cgd 	if (len < 0 || len > NFS_MAXNAMLEN) {
    996   1.1      cgd 		m_freem(mrep);
    997   1.1      cgd 		return (EBADRPC);
    998   1.1      cgd 	}
    999   1.1      cgd 	nfsm_adv(nfsm_rndup(len));
   1000   1.1      cgd 	nfsm_disect(tl, u_long *, 3*NFSX_UNSIGNED);
   1001   1.1      cgd 	cr->cr_uid = fxdr_unsigned(uid_t, *tl++);
   1002   1.1      cgd 	cr->cr_gid = fxdr_unsigned(gid_t, *tl++);
   1003   1.1      cgd 	len = fxdr_unsigned(int, *tl);
   1004   1.1      cgd 	if (len < 0 || len > RPCAUTH_UNIXGIDS) {
   1005   1.1      cgd 		m_freem(mrep);
   1006   1.1      cgd 		return (EBADRPC);
   1007   1.1      cgd 	}
   1008   1.1      cgd 	nfsm_disect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
   1009   1.1      cgd 	for (i = 1; i <= len; i++)
   1010   1.1      cgd 		if (i < NGROUPS)
   1011   1.1      cgd 			cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
   1012   1.1      cgd 		else
   1013   1.1      cgd 			tl++;
   1014   1.1      cgd 	cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
   1015   1.1      cgd 	/*
   1016   1.1      cgd 	 * Do we have any use for the verifier.
   1017   1.1      cgd 	 * According to the "Remote Procedure Call Protocol Spec." it
   1018   1.1      cgd 	 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
   1019   1.1      cgd 	 * For now, just skip over it
   1020   1.1      cgd 	 */
   1021   1.1      cgd 	len = fxdr_unsigned(int, *++tl);
   1022   1.1      cgd 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
   1023   1.1      cgd 		m_freem(mrep);
   1024   1.1      cgd 		return (EBADRPC);
   1025   1.1      cgd 	}
   1026   1.1      cgd 	if (len > 0)
   1027   1.1      cgd 		nfsm_adv(nfsm_rndup(len));
   1028   1.1      cgd 	*mrp = mrep;
   1029   1.1      cgd 	*mdp = md;
   1030   1.1      cgd 	*dposp = dpos;
   1031   1.1      cgd 	return (0);
   1032   1.1      cgd nfsmout:
   1033   1.1      cgd 	return (error);
   1034   1.1      cgd }
   1035   1.1      cgd 
   1036   1.1      cgd /*
   1037   1.1      cgd  * Generate the rpc reply header
   1038   1.1      cgd  * siz arg. is used to decide if adding a cluster is worthwhile
   1039   1.1      cgd  */
   1040   1.1      cgd nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
   1041   1.1      cgd 	int siz;
   1042   1.1      cgd 	u_long retxid;
   1043   1.1      cgd 	int err;
   1044   1.1      cgd 	struct mbuf **mrq;
   1045   1.1      cgd 	struct mbuf **mbp;
   1046   1.1      cgd 	caddr_t *bposp;
   1047   1.1      cgd {
   1048   1.1      cgd 	register u_long *tl;
   1049   1.1      cgd 	register long t1;
   1050   1.1      cgd 	caddr_t bpos;
   1051   1.1      cgd 	struct mbuf *mreq, *mb, *mb2;
   1052   1.1      cgd 
   1053   1.1      cgd 	NFSMGETHDR(mreq);
   1054   1.1      cgd 	mb = mreq;
   1055   1.1      cgd 	if ((siz+RPC_REPLYSIZ) > MHLEN)
   1056   1.1      cgd 		MCLGET(mreq, M_WAIT);
   1057   1.1      cgd 	tl = mtod(mreq, u_long *);
   1058   1.1      cgd 	mreq->m_len = 6*NFSX_UNSIGNED;
   1059   1.1      cgd 	bpos = ((caddr_t)tl)+mreq->m_len;
   1060   1.8       ws 	*tl++ = txdr_unsigned(retxid);
   1061   1.1      cgd 	*tl++ = rpc_reply;
   1062   1.1      cgd 	if (err == ERPCMISMATCH) {
   1063   1.1      cgd 		*tl++ = rpc_msgdenied;
   1064   1.1      cgd 		*tl++ = rpc_mismatch;
   1065   1.1      cgd 		*tl++ = txdr_unsigned(2);
   1066   1.1      cgd 		*tl = txdr_unsigned(2);
   1067   1.1      cgd 	} else {
   1068   1.1      cgd 		*tl++ = rpc_msgaccepted;
   1069   1.1      cgd 		*tl++ = 0;
   1070   1.1      cgd 		*tl++ = 0;
   1071   1.1      cgd 		switch (err) {
   1072   1.1      cgd 		case EPROGUNAVAIL:
   1073   1.1      cgd 			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
   1074   1.1      cgd 			break;
   1075   1.1      cgd 		case EPROGMISMATCH:
   1076   1.1      cgd 			*tl = txdr_unsigned(RPC_PROGMISMATCH);
   1077   1.1      cgd 			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
   1078   1.1      cgd 			*tl++ = txdr_unsigned(2);
   1079   1.1      cgd 			*tl = txdr_unsigned(2);	/* someday 3 */
   1080   1.1      cgd 			break;
   1081   1.1      cgd 		case EPROCUNAVAIL:
   1082   1.1      cgd 			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
   1083   1.1      cgd 			break;
   1084   1.1      cgd 		default:
   1085   1.1      cgd 			*tl = 0;
   1086   1.1      cgd 			if (err != VNOVAL) {
   1087   1.1      cgd 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
   1088   1.1      cgd 				*tl = txdr_unsigned(err);
   1089   1.1      cgd 			}
   1090   1.1      cgd 			break;
   1091   1.1      cgd 		};
   1092   1.1      cgd 	}
   1093   1.1      cgd 	*mrq = mreq;
   1094   1.1      cgd 	*mbp = mb;
   1095   1.1      cgd 	*bposp = bpos;
   1096   1.1      cgd 	if (err != 0 && err != VNOVAL)
   1097   1.1      cgd 		nfsstats.srvrpc_errs++;
   1098   1.1      cgd 	return (0);
   1099   1.1      cgd }
   1100   1.1      cgd 
   1101   1.1      cgd /*
   1102   1.1      cgd  * Nfs timer routine
   1103   1.1      cgd  * Scan the nfsreq list and retranmit any requests that have timed out
   1104   1.1      cgd  * To avoid retransmission attempts on STREAM sockets (in the future) make
   1105   1.1      cgd  * sure to set the r_retry field to 0 (implies nm_retry == 0).
   1106   1.1      cgd  */
   1107   1.7  mycroft void
   1108   1.1      cgd nfs_timer()
   1109   1.1      cgd {
   1110   1.1      cgd 	register struct nfsreq *rep;
   1111   1.1      cgd 	register struct mbuf *m;
   1112   1.1      cgd 	register struct socket *so;
   1113   1.1      cgd 	register struct nfsmount *nmp;
   1114   1.1      cgd 	int s, error;
   1115   1.1      cgd 
   1116   1.1      cgd 	s = splnet();
   1117   1.1      cgd 	for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
   1118   1.1      cgd 		nmp = rep->r_nmp;
   1119   1.1      cgd 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) ||
   1120   1.1      cgd 		    (so = nmp->nm_so) == NULL)
   1121   1.1      cgd 			continue;
   1122   1.1      cgd 		if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) {
   1123   1.1      cgd 			rep->r_flags |= R_SOFTTERM;
   1124   1.1      cgd 			continue;
   1125   1.1      cgd 		}
   1126   1.1      cgd 		if (rep->r_flags & R_TIMING)	/* update rtt in mount */
   1127   1.1      cgd 			nmp->nm_rtt++;
   1128   1.1      cgd 		/* If not timed out */
   1129   1.1      cgd 		if (++rep->r_timer < nmp->nm_rto)
   1130   1.1      cgd 			continue;
   1131   1.1      cgd 		/* Do backoff and save new timeout in mount */
   1132   1.1      cgd 		if (rep->r_flags & R_TIMING) {
   1133   1.1      cgd 			nfs_backofftimer(nmp);
   1134   1.1      cgd 			rep->r_flags &= ~R_TIMING;
   1135   1.1      cgd 			nmp->nm_rtt = -1;
   1136   1.1      cgd 		}
   1137   1.1      cgd 		if (rep->r_flags & R_SENT) {
   1138   1.1      cgd 			rep->r_flags &= ~R_SENT;
   1139   1.1      cgd 			nmp->nm_sent--;
   1140   1.1      cgd 		}
   1141   1.1      cgd 
   1142   1.1      cgd 		/*
   1143   1.1      cgd 		 * Check for too many retries on soft mount.
   1144   1.1      cgd 		 * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
   1145   1.1      cgd 		 */
   1146   1.1      cgd 		if (++rep->r_rexmit > NFS_MAXREXMIT)
   1147   1.1      cgd 			rep->r_rexmit = NFS_MAXREXMIT;
   1148   1.1      cgd 
   1149   1.1      cgd 		/*
   1150   1.1      cgd 		 * Check for server not responding
   1151   1.1      cgd 		 */
   1152   1.1      cgd 		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
   1153   1.1      cgd 		     rep->r_rexmit > NFS_FISHY) {
   1154   1.1      cgd 			nfs_msg(rep->r_procp,
   1155   1.1      cgd 			    nmp->nm_mountp->mnt_stat.f_mntfromname,
   1156   1.1      cgd 			    "not responding");
   1157   1.1      cgd 			rep->r_flags |= R_TPRINTFMSG;
   1158   1.1      cgd 		}
   1159   1.1      cgd 		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
   1160   1.1      cgd 			nfsstats.rpctimeouts++;
   1161   1.1      cgd 			rep->r_flags |= R_SOFTTERM;
   1162   1.1      cgd 			continue;
   1163   1.1      cgd 		}
   1164   1.1      cgd 		if (nmp->nm_sotype != SOCK_DGRAM)
   1165   1.1      cgd 			continue;
   1166   1.1      cgd 
   1167   1.1      cgd 		/*
   1168   1.1      cgd 		 * If there is enough space and the window allows..
   1169   1.1      cgd 		 *	Resend it
   1170   1.1      cgd 		 */
   1171   1.1      cgd 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
   1172   1.1      cgd 		       nmp->nm_sent < nmp->nm_window &&
   1173   1.1      cgd 		       (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
   1174   1.1      cgd 			nfsstats.rpcretries++;
   1175   1.1      cgd 			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
   1176   1.1      cgd 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1177   1.1      cgd 			    (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0);
   1178   1.1      cgd 			else
   1179   1.1      cgd 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1180   1.1      cgd 			    nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0);
   1181   1.1      cgd 			if (error) {
   1182   1.1      cgd 				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
   1183   1.1      cgd 					so->so_error = 0;
   1184   1.1      cgd 			} else {
   1185   1.1      cgd 				/*
   1186   1.1      cgd 				 * We need to time the request even though we
   1187   1.1      cgd 				 * are retransmitting.
   1188   1.1      cgd 				 */
   1189   1.1      cgd 				nmp->nm_rtt = 0;
   1190   1.1      cgd 				nmp->nm_sent++;
   1191   1.1      cgd 				rep->r_flags |= (R_SENT|R_TIMING);
   1192   1.1      cgd 				rep->r_timer = rep->r_timerinit;
   1193   1.1      cgd 			}
   1194   1.1      cgd 		}
   1195   1.1      cgd 	}
   1196   1.1      cgd 	splx(s);
   1197   1.1      cgd 	timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ);
   1198   1.1      cgd }
   1199   1.1      cgd 
   1200   1.1      cgd /*
   1201   1.1      cgd  * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
   1202   1.1      cgd  * used here. The timer state is held in the nfsmount structure and
   1203   1.1      cgd  * a single request is used to clock the response. When successful
   1204   1.1      cgd  * the rtt smoothing in nfs_updatetimer is used, when failed the backoff
   1205   1.1      cgd  * is done by nfs_backofftimer. We also log failure messages in these
   1206   1.1      cgd  * routines.
   1207   1.1      cgd  *
   1208   1.1      cgd  * Congestion variables are held in the nfshost structure which
   1209   1.1      cgd  * is referenced by nfsmounts and shared per-server. This separation
   1210   1.1      cgd  * makes it possible to do per-mount timing which allows varying disk
   1211   1.1      cgd  * access times to be dealt with, while preserving a network oriented
   1212   1.1      cgd  * congestion control scheme.
   1213   1.1      cgd  *
   1214   1.1      cgd  * The windowing implements the Jacobson/Karels slowstart algorithm
   1215   1.1      cgd  * with adjusted scaling factors. We start with one request, then send
   1216   1.1      cgd  * 4 more after each success until the ssthresh limit is reached, then
   1217   1.1      cgd  * we increment at a rate proportional to the window. On failure, we
   1218   1.1      cgd  * remember 3/4 the current window and clamp the send limit to 1. Note
   1219   1.1      cgd  * ICMP source quench is not reflected in so->so_error so we ignore that
   1220   1.1      cgd  * for now.
   1221   1.1      cgd  *
   1222   1.1      cgd  * NFS behaves much more like a transport protocol with these changes,
   1223   1.1      cgd  * shedding the teenage pedal-to-the-metal tendencies of "other"
   1224   1.1      cgd  * implementations.
   1225   1.1      cgd  *
   1226   1.1      cgd  * Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
   1227   1.1      cgd  */
   1228   1.1      cgd 
   1229   1.1      cgd /*
   1230   1.1      cgd  * The TCP algorithm was not forgiving enough. Because the NFS server
   1231   1.1      cgd  * responds only after performing lookups/diskio/etc, we have to be
   1232   1.1      cgd  * more prepared to accept a spiky variance. The TCP algorithm is:
   1233   1.1      cgd  * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
   1234   1.1      cgd  */
   1235   1.1      cgd #define NFS_RTO(nmp)	(((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
   1236   1.1      cgd 
   1237   1.1      cgd nfs_updatetimer(nmp)
   1238   1.1      cgd 	register struct nfsmount *nmp;
   1239   1.1      cgd {
   1240   1.1      cgd 
   1241   1.1      cgd 	/* If retransmitted, clear and return */
   1242   1.1      cgd 	if (nmp->nm_rexmit || nmp->nm_currexmit) {
   1243   1.1      cgd 		nmp->nm_rexmit = nmp->nm_currexmit = 0;
   1244   1.1      cgd 		return;
   1245   1.1      cgd 	}
   1246   1.1      cgd 	/* If have a measurement, do smoothing */
   1247   1.1      cgd 	if (nmp->nm_srtt) {
   1248   1.1      cgd 		register short delta;
   1249   1.1      cgd 		delta = nmp->nm_rtt - (nmp->nm_srtt >> 3);
   1250   1.1      cgd 		if ((nmp->nm_srtt += delta) <= 0)
   1251   1.1      cgd 			nmp->nm_srtt = 1;
   1252   1.1      cgd 		if (delta < 0)
   1253   1.1      cgd 			delta = -delta;
   1254   1.1      cgd 		delta -= (nmp->nm_rttvar >> 2);
   1255   1.1      cgd 		if ((nmp->nm_rttvar += delta) <= 0)
   1256   1.1      cgd 			nmp->nm_rttvar = 1;
   1257   1.1      cgd 	/* Else initialize */
   1258   1.1      cgd 	} else {
   1259   1.1      cgd 		nmp->nm_rttvar = nmp->nm_rtt << 1;
   1260   1.1      cgd 		if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2;
   1261   1.1      cgd 		nmp->nm_srtt = nmp->nm_rttvar << 2;
   1262   1.1      cgd 	}
   1263   1.1      cgd 	/* Compute new Retransmission TimeOut and clip */
   1264   1.1      cgd 	nmp->nm_rto = NFS_RTO(nmp);
   1265   1.1      cgd 	if (nmp->nm_rto < NFS_MINTIMEO)
   1266   1.1      cgd 		nmp->nm_rto = NFS_MINTIMEO;
   1267   1.1      cgd 	else if (nmp->nm_rto > NFS_MAXTIMEO)
   1268   1.1      cgd 		nmp->nm_rto = NFS_MAXTIMEO;
   1269   1.1      cgd 
   1270   1.1      cgd 	/* Update window estimate */
   1271   1.1      cgd 	if (nmp->nm_window < nmp->nm_ssthresh)	/* quickly */
   1272   1.1      cgd 		nmp->nm_window += 4;
   1273   1.1      cgd 	else {						/* slowly */
   1274   1.1      cgd 		register long incr = ++nmp->nm_winext;
   1275   1.1      cgd 		incr = (incr * incr) / nmp->nm_window;
   1276   1.1      cgd 		if (incr > 0) {
   1277   1.1      cgd 			nmp->nm_winext = 0;
   1278   1.1      cgd 			++nmp->nm_window;
   1279   1.1      cgd 		}
   1280   1.1      cgd 	}
   1281   1.1      cgd 	if (nmp->nm_window > NFS_MAXWINDOW)
   1282   1.1      cgd 		nmp->nm_window = NFS_MAXWINDOW;
   1283   1.1      cgd }
   1284   1.1      cgd 
   1285   1.1      cgd nfs_backofftimer(nmp)
   1286   1.1      cgd 	register struct nfsmount *nmp;
   1287   1.1      cgd {
   1288   1.1      cgd 	register unsigned long newrto;
   1289   1.1      cgd 
   1290   1.1      cgd 	/* Clip shift count */
   1291   1.1      cgd 	if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto)
   1292   1.1      cgd 		nmp->nm_rexmit = 8 * sizeof nmp->nm_rto;
   1293   1.1      cgd 	/* Back off RTO exponentially */
   1294   1.1      cgd 	newrto = NFS_RTO(nmp);
   1295   1.1      cgd 	newrto <<= (nmp->nm_rexmit - 1);
   1296   1.1      cgd 	if (newrto == 0 || newrto > NFS_MAXTIMEO)
   1297   1.1      cgd 		newrto = NFS_MAXTIMEO;
   1298   1.1      cgd 	nmp->nm_rto = newrto;
   1299   1.1      cgd 
   1300   1.1      cgd 	/* If too many retries, message, assume a bogus RTT and re-measure */
   1301   1.1      cgd 	if (nmp->nm_currexmit < nmp->nm_rexmit) {
   1302   1.1      cgd 		nmp->nm_currexmit = nmp->nm_rexmit;
   1303   1.1      cgd 		if (nmp->nm_currexmit >= nfsrexmtthresh) {
   1304   1.1      cgd 			if (nmp->nm_currexmit == nfsrexmtthresh) {
   1305   1.1      cgd 				nmp->nm_rttvar += (nmp->nm_srtt >> 2);
   1306   1.1      cgd 				nmp->nm_srtt = 0;
   1307   1.1      cgd 			}
   1308   1.1      cgd 		}
   1309   1.1      cgd 	}
   1310   1.1      cgd 	/* Close down window but remember this point (3/4 current) for later */
   1311   1.1      cgd 	nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2;
   1312   1.1      cgd 	nmp->nm_window = 1;
   1313   1.1      cgd 	nmp->nm_winext = 0;
   1314   1.1      cgd }
   1315   1.1      cgd 
   1316   1.1      cgd /*
   1317   1.1      cgd  * Test for a termination signal pending on procp.
   1318   1.1      cgd  * This is used for NFSMNT_INT mounts.
   1319   1.1      cgd  */
   1320   1.1      cgd nfs_sigintr(p)
   1321   1.1      cgd 	register struct proc *p;
   1322   1.1      cgd {
   1323   1.1      cgd 	if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) &
   1324   1.1      cgd 	    NFSINT_SIGMASK))
   1325   1.1      cgd 		return (1);
   1326   1.1      cgd 	else
   1327   1.1      cgd 		return (0);
   1328   1.1      cgd }
   1329   1.1      cgd 
   1330   1.1      cgd nfs_msg(p, server, msg)
   1331   1.1      cgd 	struct proc *p;
   1332   1.1      cgd 	char *server, *msg;
   1333   1.1      cgd {
   1334   1.1      cgd 	tpr_t tpr;
   1335   1.1      cgd 
   1336   1.1      cgd 	if (p)
   1337   1.1      cgd 		tpr = tprintf_open(p);
   1338   1.1      cgd 	else
   1339   1.1      cgd 		tpr = NULL;
   1340   1.1      cgd 	tprintf(tpr, "nfs server %s: %s\n", server, msg);
   1341   1.1      cgd 	tprintf_close(tpr);
   1342   1.1      cgd }
   1343   1.1      cgd 
   1344   1.1      cgd /*
   1345   1.1      cgd  * Lock a socket against others.
   1346   1.1      cgd  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
   1347   1.1      cgd  * and also to avoid race conditions between the processes with nfs requests
   1348   1.1      cgd  * in progress when a reconnect is necessary.
   1349   1.1      cgd  */
   1350   1.1      cgd nfs_solock(flagp)
   1351   1.1      cgd 	register int *flagp;
   1352   1.1      cgd {
   1353   1.1      cgd 
   1354   1.1      cgd 	while (*flagp & NFSMNT_SCKLOCK) {
   1355   1.1      cgd 		*flagp |= NFSMNT_WANTSCK;
   1356   1.1      cgd 		(void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0);
   1357   1.1      cgd 	}
   1358   1.1      cgd 	*flagp |= NFSMNT_SCKLOCK;
   1359   1.1      cgd }
   1360   1.1      cgd 
   1361   1.1      cgd /*
   1362   1.1      cgd  * Unlock the stream socket for others.
   1363   1.1      cgd  */
   1364   1.1      cgd nfs_sounlock(flagp)
   1365   1.1      cgd 	register int *flagp;
   1366   1.1      cgd {
   1367   1.1      cgd 
   1368   1.1      cgd 	if ((*flagp & NFSMNT_SCKLOCK) == 0)
   1369   1.1      cgd 		panic("nfs sounlock");
   1370   1.1      cgd 	*flagp &= ~NFSMNT_SCKLOCK;
   1371   1.1      cgd 	if (*flagp & NFSMNT_WANTSCK) {
   1372   1.1      cgd 		*flagp &= ~NFSMNT_WANTSCK;
   1373   1.1      cgd 		wakeup((caddr_t)flagp);
   1374   1.1      cgd 	}
   1375   1.1      cgd }
   1376   1.1      cgd 
   1377   1.1      cgd /*
   1378   1.1      cgd  * This function compares two net addresses by family and returns TRUE
   1379   1.1      cgd  * if they are the same.
   1380   1.1      cgd  * If there is any doubt, return FALSE.
   1381   1.1      cgd  */
   1382   1.1      cgd nfs_netaddr_match(nam1, nam2)
   1383   1.1      cgd 	struct mbuf *nam1, *nam2;
   1384   1.1      cgd {
   1385   1.1      cgd 	register struct sockaddr *saddr1, *saddr2;
   1386   1.1      cgd 
   1387   1.1      cgd 	saddr1 = mtod(nam1, struct sockaddr *);
   1388   1.1      cgd 	saddr2 = mtod(nam2, struct sockaddr *);
   1389   1.1      cgd 	if (saddr1->sa_family != saddr2->sa_family)
   1390   1.1      cgd 		return (0);
   1391   1.1      cgd 
   1392   1.1      cgd 	/*
   1393   1.1      cgd 	 * Must do each address family separately since unused fields
   1394   1.1      cgd 	 * are undefined values and not always zeroed.
   1395   1.1      cgd 	 */
   1396   1.1      cgd 	switch (saddr1->sa_family) {
   1397   1.1      cgd 	case AF_INET:
   1398   1.1      cgd 		if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr ==
   1399   1.1      cgd 		    ((struct sockaddr_in *)saddr2)->sin_addr.s_addr)
   1400   1.1      cgd 			return (1);
   1401   1.1      cgd 		break;
   1402   1.1      cgd 	default:
   1403   1.1      cgd 		break;
   1404   1.1      cgd 	};
   1405   1.1      cgd 	return (0);
   1406   1.1      cgd }
   1407   1.1      cgd 
   1408   1.1      cgd /*
   1409   1.1      cgd  * Check the hostname fields for nfsd's mask and match fields.
   1410   1.1      cgd  * By address family:
   1411   1.1      cgd  * - Bitwise AND the mask with the host address field
   1412   1.1      cgd  * - Compare for == with match
   1413   1.1      cgd  * return TRUE if not equal
   1414   1.1      cgd  */
   1415   1.1      cgd nfs_badnam(nam, msk, mtch)
   1416   1.1      cgd 	register struct mbuf *nam, *msk, *mtch;
   1417   1.1      cgd {
   1418   1.1      cgd 	switch (mtod(nam, struct sockaddr *)->sa_family) {
   1419   1.1      cgd 	case AF_INET:
   1420   1.1      cgd 		return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr &
   1421   1.1      cgd 			 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) !=
   1422   1.1      cgd 			 mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr);
   1423   1.1      cgd 	default:
   1424   1.1      cgd 		printf("nfs_badmatch, unknown sa_family\n");
   1425   1.1      cgd 		return (0);
   1426   1.1      cgd 	};
   1427   1.1      cgd }
   1428