Home | History | Annotate | Line # | Download | only in nfs
nfs_socket.c revision 1.33
      1  1.33      fvdl /*	$NetBSD: nfs_socket.c,v 1.33 1997/02/04 21:33:19 fvdl Exp $	*/
      2  1.15       cgd 
      3   1.1       cgd /*
      4  1.24      fvdl  * Copyright (c) 1989, 1991, 1993, 1995
      5  1.14   mycroft  *	The Regents of the University of California.  All rights reserved.
      6   1.1       cgd  *
      7   1.1       cgd  * This code is derived from software contributed to Berkeley by
      8   1.1       cgd  * Rick Macklem at The University of Guelph.
      9   1.1       cgd  *
     10   1.1       cgd  * Redistribution and use in source and binary forms, with or without
     11   1.1       cgd  * modification, are permitted provided that the following conditions
     12   1.1       cgd  * are met:
     13   1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     14   1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     15   1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     16   1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     17   1.1       cgd  *    documentation and/or other materials provided with the distribution.
     18   1.1       cgd  * 3. All advertising materials mentioning features or use of this software
     19   1.1       cgd  *    must display the following acknowledgement:
     20   1.1       cgd  *	This product includes software developed by the University of
     21   1.1       cgd  *	California, Berkeley and its contributors.
     22   1.1       cgd  * 4. Neither the name of the University nor the names of its contributors
     23   1.1       cgd  *    may be used to endorse or promote products derived from this software
     24   1.1       cgd  *    without specific prior written permission.
     25   1.1       cgd  *
     26   1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     27   1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28   1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29   1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     30   1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     31   1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     32   1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     33   1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     34   1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     35   1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     36   1.1       cgd  * SUCH DAMAGE.
     37   1.1       cgd  *
     38  1.24      fvdl  *	@(#)nfs_socket.c	8.5 (Berkeley) 3/30/95
     39   1.1       cgd  */
     40   1.1       cgd 
     41   1.1       cgd /*
     42   1.1       cgd  * Socket operations for use by nfs
     43   1.1       cgd  */
     44   1.1       cgd 
     45   1.9   mycroft #include <sys/param.h>
     46   1.9   mycroft #include <sys/systm.h>
     47   1.9   mycroft #include <sys/proc.h>
     48   1.9   mycroft #include <sys/mount.h>
     49   1.9   mycroft #include <sys/kernel.h>
     50   1.9   mycroft #include <sys/mbuf.h>
     51   1.9   mycroft #include <sys/vnode.h>
     52   1.9   mycroft #include <sys/domain.h>
     53   1.9   mycroft #include <sys/protosw.h>
     54   1.9   mycroft #include <sys/socket.h>
     55   1.9   mycroft #include <sys/socketvar.h>
     56   1.9   mycroft #include <sys/syslog.h>
     57   1.9   mycroft #include <sys/tprintf.h>
     58  1.23  christos #include <sys/namei.h>
     59   1.1       cgd 
     60   1.9   mycroft #include <netinet/in.h>
     61   1.9   mycroft #include <netinet/tcp.h>
     62  1.24      fvdl 
     63   1.9   mycroft #include <nfs/rpcv2.h>
     64  1.24      fvdl #include <nfs/nfsproto.h>
     65   1.9   mycroft #include <nfs/nfs.h>
     66   1.9   mycroft #include <nfs/xdr_subs.h>
     67   1.9   mycroft #include <nfs/nfsm_subs.h>
     68   1.9   mycroft #include <nfs/nfsmount.h>
     69  1.14   mycroft #include <nfs/nfsnode.h>
     70  1.14   mycroft #include <nfs/nfsrtt.h>
     71  1.14   mycroft #include <nfs/nqnfs.h>
     72  1.23  christos #include <nfs/nfs_var.h>
     73   1.1       cgd 
     74   1.1       cgd #define	TRUE	1
     75   1.1       cgd #define	FALSE	0
     76   1.1       cgd 
     77   1.1       cgd /*
     78  1.14   mycroft  * Estimate rto for an nfs rpc sent via. an unreliable datagram.
     79  1.14   mycroft  * Use the mean and mean deviation of rtt for the appropriate type of rpc
     80  1.14   mycroft  * for the frequent rpcs and a default for the others.
     81  1.14   mycroft  * The justification for doing "other" this way is that these rpcs
     82  1.14   mycroft  * happen so infrequently that timer est. would probably be stale.
     83  1.14   mycroft  * Also, since many of these rpcs are
     84  1.14   mycroft  * non-idempotent, a conservative timeout is desired.
     85  1.14   mycroft  * getattr, lookup - A+2D
     86  1.14   mycroft  * read, write     - A+4D
     87  1.14   mycroft  * other           - nm_timeo
     88  1.14   mycroft  */
     89  1.14   mycroft #define	NFS_RTO(n, t) \
     90  1.14   mycroft 	((t) == 0 ? (n)->nm_timeo : \
     91  1.14   mycroft 	 ((t) < 3 ? \
     92  1.14   mycroft 	  (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
     93  1.14   mycroft 	  ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
     94  1.14   mycroft #define	NFS_SRTT(r)	(r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
     95  1.14   mycroft #define	NFS_SDRTT(r)	(r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
     96  1.14   mycroft /*
     97   1.1       cgd  * External data, mostly RPC constants in XDR form
     98   1.1       cgd  */
     99  1.22       cgd extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers,
    100  1.24      fvdl 	rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr,
    101  1.14   mycroft 	rpc_auth_kerb;
    102  1.24      fvdl extern u_int32_t nfs_prog, nqnfs_prog;
    103  1.14   mycroft extern time_t nqnfsstarttime;
    104  1.24      fvdl extern struct nfsstats nfsstats;
    105  1.24      fvdl extern int nfsv3_procid[NFS_NPROCS];
    106  1.24      fvdl extern int nfs_ticks;
    107  1.14   mycroft 
    108  1.14   mycroft /*
    109  1.14   mycroft  * Defines which timer to use for the procnum.
    110  1.14   mycroft  * 0 - default
    111  1.14   mycroft  * 1 - getattr
    112  1.14   mycroft  * 2 - lookup
    113  1.14   mycroft  * 3 - read
    114  1.14   mycroft  * 4 - write
    115  1.14   mycroft  */
    116  1.14   mycroft static int proct[NFS_NPROCS] = {
    117  1.24      fvdl 	0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
    118  1.24      fvdl 	0, 0, 0,
    119   1.1       cgd };
    120  1.14   mycroft 
    121  1.14   mycroft /*
    122  1.14   mycroft  * There is a congestion window for outstanding rpcs maintained per mount
    123  1.14   mycroft  * point. The cwnd size is adjusted in roughly the way that:
    124  1.14   mycroft  * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
    125  1.14   mycroft  * SIGCOMM '88". ACM, August 1988.
    126  1.14   mycroft  * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
    127  1.14   mycroft  * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
    128  1.14   mycroft  * of rpcs is in progress.
    129  1.14   mycroft  * (The sent count and cwnd are scaled for integer arith.)
    130  1.14   mycroft  * Variants of "slow start" were tried and were found to be too much of a
    131  1.14   mycroft  * performance hit (ave. rtt 3 times larger),
    132  1.14   mycroft  * I suspect due to the large rtt that nfs rpcs have.
    133  1.14   mycroft  */
    134  1.14   mycroft #define	NFS_CWNDSCALE	256
    135  1.14   mycroft #define	NFS_MAXCWND	(NFS_CWNDSCALE * 32)
    136  1.14   mycroft static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
    137  1.14   mycroft int nfsrtton = 0;
    138  1.14   mycroft struct nfsrtt nfsrtt;
    139   1.1       cgd 
    140   1.1       cgd /*
    141   1.1       cgd  * Initialize sockets and congestion for a new NFS connection.
    142   1.1       cgd  * We do not free the sockaddr if error.
    143   1.1       cgd  */
    144  1.23  christos int
    145  1.14   mycroft nfs_connect(nmp, rep)
    146   1.1       cgd 	register struct nfsmount *nmp;
    147  1.14   mycroft 	struct nfsreq *rep;
    148   1.1       cgd {
    149   1.1       cgd 	register struct socket *so;
    150  1.14   mycroft 	int s, error, rcvreserve, sndreserve;
    151  1.11       cgd 	struct sockaddr *saddr;
    152  1.14   mycroft 	struct sockaddr_in *sin;
    153   1.1       cgd 	struct mbuf *m;
    154  1.22       cgd 	u_int16_t tport;
    155   1.1       cgd 
    156   1.1       cgd 	nmp->nm_so = (struct socket *)0;
    157  1.11       cgd 	saddr = mtod(nmp->nm_nam, struct sockaddr *);
    158  1.24      fvdl 	error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
    159  1.24      fvdl 		nmp->nm_soproto);
    160  1.23  christos 	if (error)
    161   1.1       cgd 		goto bad;
    162   1.1       cgd 	so = nmp->nm_so;
    163   1.1       cgd 	nmp->nm_soflags = so->so_proto->pr_flags;
    164   1.1       cgd 
    165   1.2       cgd 	/*
    166   1.2       cgd 	 * Some servers require that the client port be a reserved port number.
    167   1.2       cgd 	 */
    168  1.14   mycroft 	if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
    169   1.2       cgd 		MGET(m, M_WAIT, MT_SONAME);
    170   1.2       cgd 		sin = mtod(m, struct sockaddr_in *);
    171   1.2       cgd 		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
    172   1.2       cgd 		sin->sin_family = AF_INET;
    173   1.2       cgd 		sin->sin_addr.s_addr = INADDR_ANY;
    174   1.2       cgd 		tport = IPPORT_RESERVED - 1;
    175   1.2       cgd 		sin->sin_port = htons(tport);
    176  1.14   mycroft 		while ((error = sobind(so, m)) == EADDRINUSE &&
    177   1.2       cgd 		       --tport > IPPORT_RESERVED / 2)
    178   1.2       cgd 			sin->sin_port = htons(tport);
    179   1.2       cgd 		m_freem(m);
    180  1.14   mycroft 		if (error)
    181  1.14   mycroft 			goto bad;
    182   1.2       cgd 	}
    183   1.2       cgd 
    184   1.1       cgd 	/*
    185   1.1       cgd 	 * Protocols that do not require connections may be optionally left
    186   1.1       cgd 	 * unconnected for servers that reply from a port other than NFS_PORT.
    187   1.1       cgd 	 */
    188   1.1       cgd 	if (nmp->nm_flag & NFSMNT_NOCONN) {
    189   1.1       cgd 		if (nmp->nm_soflags & PR_CONNREQUIRED) {
    190   1.1       cgd 			error = ENOTCONN;
    191   1.1       cgd 			goto bad;
    192   1.1       cgd 		}
    193   1.1       cgd 	} else {
    194  1.24      fvdl 		error = soconnect(so, nmp->nm_nam);
    195  1.24      fvdl 		if (error)
    196   1.1       cgd 			goto bad;
    197   1.1       cgd 
    198   1.1       cgd 		/*
    199   1.1       cgd 		 * Wait for the connection to complete. Cribbed from the
    200  1.14   mycroft 		 * connect system call but with the wait timing out so
    201  1.14   mycroft 		 * that interruptible mounts don't hang here for a long time.
    202   1.1       cgd 		 */
    203  1.21   mycroft 		s = splsoftnet();
    204  1.14   mycroft 		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
    205  1.14   mycroft 			(void) tsleep((caddr_t)&so->so_timeo, PSOCK,
    206  1.14   mycroft 				"nfscon", 2 * hz);
    207  1.14   mycroft 			if ((so->so_state & SS_ISCONNECTING) &&
    208  1.14   mycroft 			    so->so_error == 0 && rep &&
    209  1.24      fvdl 			    (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){
    210  1.14   mycroft 				so->so_state &= ~SS_ISCONNECTING;
    211  1.14   mycroft 				splx(s);
    212  1.14   mycroft 				goto bad;
    213  1.14   mycroft 			}
    214  1.14   mycroft 		}
    215   1.1       cgd 		if (so->so_error) {
    216   1.1       cgd 			error = so->so_error;
    217  1.14   mycroft 			so->so_error = 0;
    218  1.14   mycroft 			splx(s);
    219   1.1       cgd 			goto bad;
    220   1.1       cgd 		}
    221  1.14   mycroft 		splx(s);
    222  1.14   mycroft 	}
    223  1.14   mycroft 	if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
    224  1.14   mycroft 		so->so_rcv.sb_timeo = (5 * hz);
    225  1.14   mycroft 		so->so_snd.sb_timeo = (5 * hz);
    226  1.14   mycroft 	} else {
    227  1.14   mycroft 		so->so_rcv.sb_timeo = 0;
    228  1.14   mycroft 		so->so_snd.sb_timeo = 0;
    229   1.1       cgd 	}
    230   1.1       cgd 	if (nmp->nm_sotype == SOCK_DGRAM) {
    231  1.26      fvdl 		sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
    232  1.25      fvdl 		rcvreserve = max(nmp->nm_rsize, nmp->nm_readdirsize) +
    233  1.25      fvdl 		    NFS_MAXPKTHDR;
    234  1.14   mycroft 	} else if (nmp->nm_sotype == SOCK_SEQPACKET) {
    235  1.26      fvdl 		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
    236  1.26      fvdl 		rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
    237  1.25      fvdl 		    NFS_MAXPKTHDR) * 2;
    238   1.1       cgd 	} else {
    239  1.14   mycroft 		if (nmp->nm_sotype != SOCK_STREAM)
    240  1.14   mycroft 			panic("nfscon sotype");
    241   1.1       cgd 		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    242   1.1       cgd 			MGET(m, M_WAIT, MT_SOOPTS);
    243  1.22       cgd 			*mtod(m, int32_t *) = 1;
    244  1.22       cgd 			m->m_len = sizeof(int32_t);
    245   1.1       cgd 			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
    246   1.1       cgd 		}
    247  1.14   mycroft 		if (so->so_proto->pr_protocol == IPPROTO_TCP) {
    248   1.1       cgd 			MGET(m, M_WAIT, MT_SOOPTS);
    249  1.22       cgd 			*mtod(m, int32_t *) = 1;
    250  1.22       cgd 			m->m_len = sizeof(int32_t);
    251   1.1       cgd 			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
    252   1.1       cgd 		}
    253  1.22       cgd 		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
    254  1.22       cgd 		    sizeof (u_int32_t)) * 2;
    255  1.22       cgd 		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
    256  1.22       cgd 		    sizeof (u_int32_t)) * 2;
    257   1.1       cgd 	}
    258  1.24      fvdl 	error = soreserve(so, sndreserve, rcvreserve);
    259  1.24      fvdl 	if (error)
    260  1.14   mycroft 		goto bad;
    261   1.1       cgd 	so->so_rcv.sb_flags |= SB_NOINTR;
    262   1.1       cgd 	so->so_snd.sb_flags |= SB_NOINTR;
    263   1.1       cgd 
    264   1.1       cgd 	/* Initialize other non-zero congestion variables */
    265  1.14   mycroft 	nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
    266  1.14   mycroft 		nmp->nm_srtt[4] = (NFS_TIMEO << 3);
    267  1.14   mycroft 	nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
    268  1.14   mycroft 		nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
    269  1.14   mycroft 	nmp->nm_cwnd = NFS_MAXCWND / 2;	    /* Initial send window */
    270   1.1       cgd 	nmp->nm_sent = 0;
    271  1.14   mycroft 	nmp->nm_timeouts = 0;
    272   1.1       cgd 	return (0);
    273   1.1       cgd 
    274   1.1       cgd bad:
    275   1.1       cgd 	nfs_disconnect(nmp);
    276   1.1       cgd 	return (error);
    277   1.1       cgd }
    278   1.1       cgd 
    279   1.1       cgd /*
    280   1.1       cgd  * Reconnect routine:
    281   1.1       cgd  * Called when a connection is broken on a reliable protocol.
    282   1.1       cgd  * - clean up the old socket
    283   1.1       cgd  * - nfs_connect() again
    284   1.1       cgd  * - set R_MUSTRESEND for all outstanding requests on mount point
    285   1.1       cgd  * If this fails the mount point is DEAD!
    286  1.14   mycroft  * nb: Must be called with the nfs_sndlock() set on the mount point.
    287   1.1       cgd  */
    288  1.23  christos int
    289  1.14   mycroft nfs_reconnect(rep)
    290   1.1       cgd 	register struct nfsreq *rep;
    291   1.1       cgd {
    292   1.1       cgd 	register struct nfsreq *rp;
    293  1.14   mycroft 	register struct nfsmount *nmp = rep->r_nmp;
    294   1.1       cgd 	int error;
    295   1.1       cgd 
    296  1.14   mycroft 	nfs_disconnect(nmp);
    297  1.23  christos 	while ((error = nfs_connect(nmp, rep)) != 0) {
    298  1.14   mycroft 		if (error == EINTR || error == ERESTART)
    299   1.1       cgd 			return (EINTR);
    300   1.1       cgd 		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
    301   1.1       cgd 	}
    302   1.1       cgd 
    303   1.1       cgd 	/*
    304   1.1       cgd 	 * Loop through outstanding request list and fix up all requests
    305   1.1       cgd 	 * on old socket.
    306   1.1       cgd 	 */
    307  1.16   mycroft 	for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) {
    308   1.1       cgd 		if (rp->r_nmp == nmp)
    309   1.1       cgd 			rp->r_flags |= R_MUSTRESEND;
    310   1.1       cgd 	}
    311   1.1       cgd 	return (0);
    312   1.1       cgd }
    313   1.1       cgd 
    314   1.1       cgd /*
    315   1.1       cgd  * NFS disconnect. Clean up and unlink.
    316   1.1       cgd  */
    317   1.1       cgd void
    318   1.1       cgd nfs_disconnect(nmp)
    319   1.1       cgd 	register struct nfsmount *nmp;
    320   1.1       cgd {
    321   1.1       cgd 	register struct socket *so;
    322   1.1       cgd 
    323   1.1       cgd 	if (nmp->nm_so) {
    324   1.1       cgd 		so = nmp->nm_so;
    325   1.1       cgd 		nmp->nm_so = (struct socket *)0;
    326   1.1       cgd 		soshutdown(so, 2);
    327   1.1       cgd 		soclose(so);
    328   1.1       cgd 	}
    329   1.1       cgd }
    330   1.1       cgd 
    331   1.1       cgd /*
    332   1.1       cgd  * This is the nfs send routine. For connection based socket types, it
    333  1.14   mycroft  * must be called with an nfs_sndlock() on the socket.
    334   1.1       cgd  * "rep == NULL" indicates that it has been called from a server.
    335  1.14   mycroft  * For the client side:
    336  1.14   mycroft  * - return EINTR if the RPC is terminated, 0 otherwise
    337  1.14   mycroft  * - set R_MUSTRESEND if the send fails for any reason
    338  1.14   mycroft  * - do any cleanup required by recoverable socket errors (???)
    339  1.14   mycroft  * For the server side:
    340  1.14   mycroft  * - return EINTR or ERESTART if interrupted by a signal
    341  1.14   mycroft  * - return EPIPE if a connection is lost for connection based sockets (TCP...)
    342  1.14   mycroft  * - do any cleanup required by recoverable socket errors (???)
    343   1.1       cgd  */
    344  1.23  christos int
    345   1.1       cgd nfs_send(so, nam, top, rep)
    346   1.1       cgd 	register struct socket *so;
    347   1.1       cgd 	struct mbuf *nam;
    348   1.1       cgd 	register struct mbuf *top;
    349   1.1       cgd 	struct nfsreq *rep;
    350   1.1       cgd {
    351   1.1       cgd 	struct mbuf *sendnam;
    352  1.14   mycroft 	int error, soflags, flags;
    353   1.1       cgd 
    354   1.1       cgd 	if (rep) {
    355   1.1       cgd 		if (rep->r_flags & R_SOFTTERM) {
    356   1.1       cgd 			m_freem(top);
    357   1.1       cgd 			return (EINTR);
    358   1.1       cgd 		}
    359  1.14   mycroft 		if ((so = rep->r_nmp->nm_so) == NULL) {
    360  1.14   mycroft 			rep->r_flags |= R_MUSTRESEND;
    361  1.14   mycroft 			m_freem(top);
    362  1.14   mycroft 			return (0);
    363  1.14   mycroft 		}
    364   1.1       cgd 		rep->r_flags &= ~R_MUSTRESEND;
    365   1.1       cgd 		soflags = rep->r_nmp->nm_soflags;
    366   1.1       cgd 	} else
    367   1.1       cgd 		soflags = so->so_proto->pr_flags;
    368   1.1       cgd 	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
    369   1.1       cgd 		sendnam = (struct mbuf *)0;
    370   1.1       cgd 	else
    371   1.1       cgd 		sendnam = nam;
    372  1.14   mycroft 	if (so->so_type == SOCK_SEQPACKET)
    373  1.14   mycroft 		flags = MSG_EOR;
    374  1.14   mycroft 	else
    375  1.14   mycroft 		flags = 0;
    376   1.1       cgd 
    377   1.1       cgd 	error = sosend(so, sendnam, (struct uio *)0, top,
    378  1.14   mycroft 		(struct mbuf *)0, flags);
    379  1.14   mycroft 	if (error) {
    380  1.14   mycroft 		if (rep) {
    381  1.14   mycroft 			log(LOG_INFO, "nfs send error %d for server %s\n",error,
    382  1.14   mycroft 			    rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    383  1.14   mycroft 			/*
    384  1.14   mycroft 			 * Deal with errors for the client side.
    385  1.14   mycroft 			 */
    386  1.14   mycroft 			if (rep->r_flags & R_SOFTTERM)
    387  1.14   mycroft 				error = EINTR;
    388  1.14   mycroft 			else
    389  1.14   mycroft 				rep->r_flags |= R_MUSTRESEND;
    390  1.14   mycroft 		} else
    391  1.14   mycroft 			log(LOG_INFO, "nfsd send error %d\n", error);
    392  1.14   mycroft 
    393  1.14   mycroft 		/*
    394  1.14   mycroft 		 * Handle any recoverable (soft) socket errors here. (???)
    395  1.14   mycroft 		 */
    396  1.14   mycroft 		if (error != EINTR && error != ERESTART &&
    397  1.14   mycroft 			error != EWOULDBLOCK && error != EPIPE)
    398   1.1       cgd 			error = 0;
    399   1.1       cgd 	}
    400   1.1       cgd 	return (error);
    401   1.1       cgd }
    402   1.1       cgd 
    403  1.32   thorpej #ifdef NFS
    404   1.1       cgd /*
    405   1.1       cgd  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
    406   1.1       cgd  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
    407   1.1       cgd  * Mark and consolidate the data into a new mbuf list.
    408   1.1       cgd  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
    409   1.1       cgd  *     small mbufs.
    410   1.1       cgd  * For SOCK_STREAM we must be very careful to read an entire record once
    411   1.1       cgd  * we have read any of it, even if the system call has been interrupted.
    412   1.1       cgd  */
    413  1.23  christos int
    414  1.14   mycroft nfs_receive(rep, aname, mp)
    415  1.14   mycroft 	register struct nfsreq *rep;
    416   1.1       cgd 	struct mbuf **aname;
    417   1.1       cgd 	struct mbuf **mp;
    418   1.1       cgd {
    419  1.14   mycroft 	register struct socket *so;
    420   1.1       cgd 	struct uio auio;
    421   1.1       cgd 	struct iovec aio;
    422   1.1       cgd 	register struct mbuf *m;
    423  1.14   mycroft 	struct mbuf *control;
    424  1.22       cgd 	u_int32_t len;
    425   1.1       cgd 	struct mbuf **getnam;
    426  1.14   mycroft 	int error, sotype, rcvflg;
    427  1.14   mycroft 	struct proc *p = curproc;	/* XXX */
    428   1.1       cgd 
    429   1.1       cgd 	/*
    430   1.1       cgd 	 * Set up arguments for soreceive()
    431   1.1       cgd 	 */
    432   1.1       cgd 	*mp = (struct mbuf *)0;
    433   1.1       cgd 	*aname = (struct mbuf *)0;
    434  1.14   mycroft 	sotype = rep->r_nmp->nm_sotype;
    435   1.1       cgd 
    436   1.1       cgd 	/*
    437   1.1       cgd 	 * For reliable protocols, lock against other senders/receivers
    438   1.1       cgd 	 * in case a reconnect is necessary.
    439   1.1       cgd 	 * For SOCK_STREAM, first get the Record Mark to find out how much
    440   1.1       cgd 	 * more there is to get.
    441   1.1       cgd 	 * We must lock the socket against other receivers
    442   1.1       cgd 	 * until we have an entire rpc request/reply.
    443   1.1       cgd 	 */
    444  1.14   mycroft 	if (sotype != SOCK_DGRAM) {
    445  1.24      fvdl 		error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
    446  1.24      fvdl 		if (error)
    447  1.14   mycroft 			return (error);
    448   1.1       cgd tryagain:
    449   1.1       cgd 		/*
    450   1.1       cgd 		 * Check for fatal errors and resending request.
    451   1.1       cgd 		 */
    452  1.14   mycroft 		/*
    453  1.14   mycroft 		 * Ugh: If a reconnect attempt just happened, nm_so
    454  1.14   mycroft 		 * would have changed. NULL indicates a failed
    455  1.14   mycroft 		 * attempt that has essentially shut down this
    456  1.14   mycroft 		 * mount point.
    457  1.14   mycroft 		 */
    458  1.14   mycroft 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
    459  1.14   mycroft 			nfs_sndunlock(&rep->r_nmp->nm_flag);
    460  1.14   mycroft 			return (EINTR);
    461  1.14   mycroft 		}
    462  1.24      fvdl 		so = rep->r_nmp->nm_so;
    463  1.24      fvdl 		if (!so) {
    464  1.24      fvdl 			error = nfs_reconnect(rep);
    465  1.24      fvdl 			if (error) {
    466  1.14   mycroft 				nfs_sndunlock(&rep->r_nmp->nm_flag);
    467  1.14   mycroft 				return (error);
    468  1.14   mycroft 			}
    469  1.14   mycroft 			goto tryagain;
    470  1.14   mycroft 		}
    471  1.14   mycroft 		while (rep->r_flags & R_MUSTRESEND) {
    472  1.14   mycroft 			m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
    473  1.14   mycroft 			nfsstats.rpcretries++;
    474  1.23  christos 			error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
    475  1.23  christos 			if (error) {
    476  1.14   mycroft 				if (error == EINTR || error == ERESTART ||
    477  1.24      fvdl 				    (error = nfs_reconnect(rep)) != 0) {
    478  1.14   mycroft 					nfs_sndunlock(&rep->r_nmp->nm_flag);
    479  1.14   mycroft 					return (error);
    480  1.14   mycroft 				}
    481  1.14   mycroft 				goto tryagain;
    482   1.1       cgd 			}
    483   1.1       cgd 		}
    484  1.14   mycroft 		nfs_sndunlock(&rep->r_nmp->nm_flag);
    485  1.14   mycroft 		if (sotype == SOCK_STREAM) {
    486   1.1       cgd 			aio.iov_base = (caddr_t) &len;
    487  1.22       cgd 			aio.iov_len = sizeof(u_int32_t);
    488   1.1       cgd 			auio.uio_iov = &aio;
    489   1.1       cgd 			auio.uio_iovcnt = 1;
    490   1.1       cgd 			auio.uio_segflg = UIO_SYSSPACE;
    491   1.1       cgd 			auio.uio_rw = UIO_READ;
    492   1.1       cgd 			auio.uio_offset = 0;
    493  1.22       cgd 			auio.uio_resid = sizeof(u_int32_t);
    494  1.14   mycroft 			auio.uio_procp = p;
    495   1.1       cgd 			do {
    496  1.14   mycroft 			   rcvflg = MSG_WAITALL;
    497  1.14   mycroft 			   error = soreceive(so, (struct mbuf **)0, &auio,
    498   1.1       cgd 				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
    499  1.14   mycroft 			   if (error == EWOULDBLOCK && rep) {
    500   1.1       cgd 				if (rep->r_flags & R_SOFTTERM)
    501   1.1       cgd 					return (EINTR);
    502  1.14   mycroft 			   }
    503   1.1       cgd 			} while (error == EWOULDBLOCK);
    504   1.1       cgd 			if (!error && auio.uio_resid > 0) {
    505  1.14   mycroft 			    log(LOG_INFO,
    506  1.14   mycroft 				 "short receive (%d/%d) from nfs server %s\n",
    507  1.22       cgd 				 sizeof(u_int32_t) - auio.uio_resid,
    508  1.22       cgd 				 sizeof(u_int32_t),
    509   1.1       cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    510   1.1       cgd 			    error = EPIPE;
    511   1.1       cgd 			}
    512   1.1       cgd 			if (error)
    513   1.1       cgd 				goto errout;
    514   1.1       cgd 			len = ntohl(len) & ~0x80000000;
    515   1.1       cgd 			/*
    516   1.1       cgd 			 * This is SERIOUS! We are out of sync with the sender
    517   1.1       cgd 			 * and forcing a disconnect/reconnect is all I can do.
    518   1.1       cgd 			 */
    519   1.1       cgd 			if (len > NFS_MAXPACKET) {
    520  1.14   mycroft 			    log(LOG_ERR, "%s (%d) from nfs server %s\n",
    521  1.14   mycroft 				"impossible packet length",
    522  1.14   mycroft 				len,
    523  1.14   mycroft 				rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    524   1.1       cgd 			    error = EFBIG;
    525   1.1       cgd 			    goto errout;
    526   1.1       cgd 			}
    527   1.1       cgd 			auio.uio_resid = len;
    528   1.1       cgd 			do {
    529   1.1       cgd 			    rcvflg = MSG_WAITALL;
    530   1.1       cgd 			    error =  soreceive(so, (struct mbuf **)0,
    531   1.1       cgd 				&auio, mp, (struct mbuf **)0, &rcvflg);
    532   1.1       cgd 			} while (error == EWOULDBLOCK || error == EINTR ||
    533   1.1       cgd 				 error == ERESTART);
    534   1.1       cgd 			if (!error && auio.uio_resid > 0) {
    535  1.14   mycroft 			    log(LOG_INFO,
    536  1.14   mycroft 				"short receive (%d/%d) from nfs server %s\n",
    537  1.14   mycroft 				len - auio.uio_resid, len,
    538  1.14   mycroft 				rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    539   1.1       cgd 			    error = EPIPE;
    540   1.1       cgd 			}
    541   1.1       cgd 		} else {
    542  1.14   mycroft 			/*
    543  1.14   mycroft 			 * NB: Since uio_resid is big, MSG_WAITALL is ignored
    544  1.14   mycroft 			 * and soreceive() will return when it has either a
    545  1.14   mycroft 			 * control msg or a data msg.
    546  1.14   mycroft 			 * We have no use for control msg., but must grab them
    547  1.14   mycroft 			 * and then throw them away so we know what is going
    548  1.14   mycroft 			 * on.
    549  1.14   mycroft 			 */
    550  1.14   mycroft 			auio.uio_resid = len = 100000000; /* Anything Big */
    551  1.14   mycroft 			auio.uio_procp = p;
    552   1.1       cgd 			do {
    553   1.1       cgd 			    rcvflg = 0;
    554   1.1       cgd 			    error =  soreceive(so, (struct mbuf **)0,
    555  1.14   mycroft 				&auio, mp, &control, &rcvflg);
    556  1.14   mycroft 			    if (control)
    557  1.14   mycroft 				m_freem(control);
    558   1.1       cgd 			    if (error == EWOULDBLOCK && rep) {
    559   1.1       cgd 				if (rep->r_flags & R_SOFTTERM)
    560   1.1       cgd 					return (EINTR);
    561   1.1       cgd 			    }
    562  1.14   mycroft 			} while (error == EWOULDBLOCK ||
    563  1.14   mycroft 				 (!error && *mp == NULL && control));
    564  1.14   mycroft 			if ((rcvflg & MSG_EOR) == 0)
    565  1.31  christos 				printf("Egad!!\n");
    566   1.1       cgd 			if (!error && *mp == NULL)
    567   1.1       cgd 				error = EPIPE;
    568   1.1       cgd 			len -= auio.uio_resid;
    569   1.1       cgd 		}
    570   1.1       cgd errout:
    571  1.14   mycroft 		if (error && error != EINTR && error != ERESTART) {
    572   1.1       cgd 			m_freem(*mp);
    573   1.1       cgd 			*mp = (struct mbuf *)0;
    574  1.14   mycroft 			if (error != EPIPE)
    575   1.1       cgd 				log(LOG_INFO,
    576   1.1       cgd 				    "receive error %d from nfs server %s\n",
    577   1.1       cgd 				    error,
    578   1.1       cgd 				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
    579  1.14   mycroft 			error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
    580  1.14   mycroft 			if (!error)
    581  1.14   mycroft 				error = nfs_reconnect(rep);
    582   1.1       cgd 			if (!error)
    583   1.1       cgd 				goto tryagain;
    584   1.1       cgd 		}
    585   1.1       cgd 	} else {
    586  1.14   mycroft 		if ((so = rep->r_nmp->nm_so) == NULL)
    587  1.14   mycroft 			return (EACCES);
    588   1.1       cgd 		if (so->so_state & SS_ISCONNECTED)
    589   1.1       cgd 			getnam = (struct mbuf **)0;
    590   1.1       cgd 		else
    591   1.1       cgd 			getnam = aname;
    592   1.1       cgd 		auio.uio_resid = len = 1000000;
    593  1.14   mycroft 		auio.uio_procp = p;
    594   1.1       cgd 		do {
    595   1.1       cgd 			rcvflg = 0;
    596   1.1       cgd 			error =  soreceive(so, getnam, &auio, mp,
    597   1.1       cgd 				(struct mbuf **)0, &rcvflg);
    598  1.14   mycroft 			if (error == EWOULDBLOCK &&
    599   1.1       cgd 			    (rep->r_flags & R_SOFTTERM))
    600   1.1       cgd 				return (EINTR);
    601   1.1       cgd 		} while (error == EWOULDBLOCK);
    602   1.1       cgd 		len -= auio.uio_resid;
    603   1.1       cgd 	}
    604   1.1       cgd 	if (error) {
    605   1.1       cgd 		m_freem(*mp);
    606   1.1       cgd 		*mp = (struct mbuf *)0;
    607   1.1       cgd 	}
    608   1.1       cgd 	/*
    609  1.14   mycroft 	 * Search for any mbufs that are not a multiple of 4 bytes long
    610  1.14   mycroft 	 * or with m_data not longword aligned.
    611   1.1       cgd 	 * These could cause pointer alignment problems, so copy them to
    612   1.1       cgd 	 * well aligned mbufs.
    613   1.1       cgd 	 */
    614  1.14   mycroft 	nfs_realign(*mp, 5 * NFSX_UNSIGNED);
    615   1.1       cgd 	return (error);
    616   1.1       cgd }
    617   1.1       cgd 
    618   1.1       cgd /*
    619   1.1       cgd  * Implement receipt of reply on a socket.
    620   1.1       cgd  * We must search through the list of received datagrams matching them
    621   1.1       cgd  * with outstanding requests using the xid, until ours is found.
    622   1.1       cgd  */
    623   1.1       cgd /* ARGSUSED */
    624  1.23  christos int
    625  1.14   mycroft nfs_reply(myrep)
    626   1.1       cgd 	struct nfsreq *myrep;
    627   1.1       cgd {
    628   1.1       cgd 	register struct nfsreq *rep;
    629  1.14   mycroft 	register struct nfsmount *nmp = myrep->r_nmp;
    630  1.22       cgd 	register int32_t t1;
    631  1.14   mycroft 	struct mbuf *mrep, *nam, *md;
    632  1.22       cgd 	u_int32_t rxid, *tl;
    633  1.14   mycroft 	caddr_t dpos, cp2;
    634  1.14   mycroft 	int error;
    635   1.1       cgd 
    636   1.1       cgd 	/*
    637   1.1       cgd 	 * Loop around until we get our own reply
    638   1.1       cgd 	 */
    639   1.1       cgd 	for (;;) {
    640   1.1       cgd 		/*
    641   1.1       cgd 		 * Lock against other receivers so that I don't get stuck in
    642   1.1       cgd 		 * sbwait() after someone else has received my reply for me.
    643   1.1       cgd 		 * Also necessary for connection based protocols to avoid
    644   1.1       cgd 		 * race conditions during a reconnect.
    645   1.1       cgd 		 */
    646  1.24      fvdl 		error = nfs_rcvlock(myrep);
    647  1.24      fvdl 		if (error)
    648  1.14   mycroft 			return (error);
    649   1.1       cgd 		/* Already received, bye bye */
    650   1.1       cgd 		if (myrep->r_mrep != NULL) {
    651  1.14   mycroft 			nfs_rcvunlock(&nmp->nm_flag);
    652   1.1       cgd 			return (0);
    653   1.1       cgd 		}
    654   1.1       cgd 		/*
    655   1.1       cgd 		 * Get the next Rpc reply off the socket
    656   1.1       cgd 		 */
    657  1.14   mycroft 		error = nfs_receive(myrep, &nam, &mrep);
    658  1.14   mycroft 		nfs_rcvunlock(&nmp->nm_flag);
    659  1.14   mycroft 		if (error) {
    660   1.1       cgd 
    661   1.1       cgd 			/*
    662   1.1       cgd 			 * Ignore routing errors on connectionless protocols??
    663   1.1       cgd 			 */
    664   1.1       cgd 			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
    665   1.1       cgd 				nmp->nm_so->so_error = 0;
    666  1.33      fvdl 				printf("nfs_reply: ignoring error %d\n", error);
    667  1.14   mycroft 				if (myrep->r_flags & R_GETONEREP)
    668  1.14   mycroft 					return (0);
    669   1.1       cgd 				continue;
    670   1.1       cgd 			}
    671   1.1       cgd 			return (error);
    672   1.1       cgd 		}
    673  1.14   mycroft 		if (nam)
    674  1.14   mycroft 			m_freem(nam);
    675   1.1       cgd 
    676   1.1       cgd 		/*
    677   1.1       cgd 		 * Get the xid and check that it is an rpc reply
    678   1.1       cgd 		 */
    679  1.14   mycroft 		md = mrep;
    680  1.14   mycroft 		dpos = mtod(md, caddr_t);
    681  1.22       cgd 		nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED);
    682  1.14   mycroft 		rxid = *tl++;
    683  1.14   mycroft 		if (*tl != rpc_reply) {
    684  1.14   mycroft 			if (nmp->nm_flag & NFSMNT_NQNFS) {
    685  1.14   mycroft 				if (nqnfs_callback(nmp, mrep, md, dpos))
    686  1.14   mycroft 					nfsstats.rpcinvalid++;
    687  1.14   mycroft 			} else {
    688  1.14   mycroft 				nfsstats.rpcinvalid++;
    689  1.14   mycroft 				m_freem(mrep);
    690  1.14   mycroft 			}
    691  1.14   mycroft nfsmout:
    692  1.14   mycroft 			if (myrep->r_flags & R_GETONEREP)
    693  1.14   mycroft 				return (0);
    694   1.1       cgd 			continue;
    695   1.1       cgd 		}
    696  1.14   mycroft 
    697   1.1       cgd 		/*
    698   1.1       cgd 		 * Loop through the request list to match up the reply
    699   1.1       cgd 		 * Iff no match, just drop the datagram
    700   1.1       cgd 		 */
    701  1.16   mycroft 		for (rep = nfs_reqq.tqh_first; rep != 0;
    702  1.16   mycroft 		    rep = rep->r_chain.tqe_next) {
    703   1.1       cgd 			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
    704   1.1       cgd 				/* Found it.. */
    705  1.14   mycroft 				rep->r_mrep = mrep;
    706  1.14   mycroft 				rep->r_md = md;
    707  1.14   mycroft 				rep->r_dpos = dpos;
    708  1.14   mycroft 				if (nfsrtton) {
    709  1.14   mycroft 					struct rttl *rt;
    710  1.14   mycroft 
    711  1.14   mycroft 					rt = &nfsrtt.rttl[nfsrtt.pos];
    712  1.14   mycroft 					rt->proc = rep->r_procnum;
    713  1.14   mycroft 					rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
    714  1.14   mycroft 					rt->sent = nmp->nm_sent;
    715  1.14   mycroft 					rt->cwnd = nmp->nm_cwnd;
    716  1.14   mycroft 					rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
    717  1.14   mycroft 					rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
    718  1.14   mycroft 					rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
    719  1.14   mycroft 					rt->tstamp = time;
    720  1.14   mycroft 					if (rep->r_flags & R_TIMING)
    721  1.14   mycroft 						rt->rtt = rep->r_rtt;
    722  1.14   mycroft 					else
    723  1.14   mycroft 						rt->rtt = 1000000;
    724  1.14   mycroft 					nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
    725  1.14   mycroft 				}
    726   1.1       cgd 				/*
    727  1.14   mycroft 				 * Update congestion window.
    728  1.14   mycroft 				 * Do the additive increase of
    729  1.14   mycroft 				 * one rpc/rtt.
    730  1.14   mycroft 				 */
    731  1.14   mycroft 				if (nmp->nm_cwnd <= nmp->nm_sent) {
    732  1.14   mycroft 					nmp->nm_cwnd +=
    733  1.14   mycroft 					   (NFS_CWNDSCALE * NFS_CWNDSCALE +
    734  1.14   mycroft 					   (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
    735  1.14   mycroft 					if (nmp->nm_cwnd > NFS_MAXCWND)
    736  1.14   mycroft 						nmp->nm_cwnd = NFS_MAXCWND;
    737  1.14   mycroft 				}
    738  1.14   mycroft 				rep->r_flags &= ~R_SENT;
    739  1.14   mycroft 				nmp->nm_sent -= NFS_CWNDSCALE;
    740  1.14   mycroft 				/*
    741  1.14   mycroft 				 * Update rtt using a gain of 0.125 on the mean
    742  1.14   mycroft 				 * and a gain of 0.25 on the deviation.
    743   1.1       cgd 				 */
    744   1.1       cgd 				if (rep->r_flags & R_TIMING) {
    745  1.14   mycroft 					/*
    746  1.14   mycroft 					 * Since the timer resolution of
    747  1.14   mycroft 					 * NFS_HZ is so course, it can often
    748  1.14   mycroft 					 * result in r_rtt == 0. Since
    749  1.14   mycroft 					 * r_rtt == N means that the actual
    750  1.14   mycroft 					 * rtt is between N+dt and N+2-dt ticks,
    751  1.14   mycroft 					 * add 1.
    752  1.14   mycroft 					 */
    753  1.14   mycroft 					t1 = rep->r_rtt + 1;
    754  1.14   mycroft 					t1 -= (NFS_SRTT(rep) >> 3);
    755  1.14   mycroft 					NFS_SRTT(rep) += t1;
    756  1.14   mycroft 					if (t1 < 0)
    757  1.14   mycroft 						t1 = -t1;
    758  1.14   mycroft 					t1 -= (NFS_SDRTT(rep) >> 2);
    759  1.14   mycroft 					NFS_SDRTT(rep) += t1;
    760   1.1       cgd 				}
    761  1.14   mycroft 				nmp->nm_timeouts = 0;
    762   1.1       cgd 				break;
    763   1.1       cgd 			}
    764   1.1       cgd 		}
    765   1.1       cgd 		/*
    766   1.1       cgd 		 * If not matched to a request, drop it.
    767   1.1       cgd 		 * If it's mine, get out.
    768   1.1       cgd 		 */
    769  1.16   mycroft 		if (rep == 0) {
    770   1.1       cgd 			nfsstats.rpcunexpected++;
    771  1.14   mycroft 			m_freem(mrep);
    772  1.14   mycroft 		} else if (rep == myrep) {
    773  1.14   mycroft 			if (rep->r_mrep == NULL)
    774  1.14   mycroft 				panic("nfsreply nil");
    775  1.14   mycroft 			return (0);
    776  1.14   mycroft 		}
    777  1.14   mycroft 		if (myrep->r_flags & R_GETONEREP)
    778   1.1       cgd 			return (0);
    779   1.1       cgd 	}
    780   1.1       cgd }
    781   1.1       cgd 
    782   1.1       cgd /*
    783   1.1       cgd  * nfs_request - goes something like this
    784   1.1       cgd  *	- fill in request struct
    785   1.1       cgd  *	- links it into list
    786   1.1       cgd  *	- calls nfs_send() for first transmit
    787   1.1       cgd  *	- calls nfs_receive() to get reply
    788   1.1       cgd  *	- break down rpc header and return with nfs reply pointed to
    789   1.1       cgd  *	  by mrep or error
    790   1.1       cgd  * nb: always frees up mreq mbuf list
    791   1.1       cgd  */
    792  1.23  christos int
    793  1.14   mycroft nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
    794   1.1       cgd 	struct vnode *vp;
    795  1.14   mycroft 	struct mbuf *mrest;
    796   1.1       cgd 	int procnum;
    797   1.1       cgd 	struct proc *procp;
    798  1.14   mycroft 	struct ucred *cred;
    799   1.1       cgd 	struct mbuf **mrp;
    800   1.1       cgd 	struct mbuf **mdp;
    801   1.1       cgd 	caddr_t *dposp;
    802   1.1       cgd {
    803   1.1       cgd 	register struct mbuf *m, *mrep;
    804   1.1       cgd 	register struct nfsreq *rep;
    805  1.22       cgd 	register u_int32_t *tl;
    806  1.14   mycroft 	register int i;
    807   1.1       cgd 	struct nfsmount *nmp;
    808  1.14   mycroft 	struct mbuf *md, *mheadend;
    809  1.14   mycroft 	struct nfsnode *np;
    810  1.24      fvdl 	char nickv[RPCX_NICKVERF];
    811  1.14   mycroft 	time_t reqtime, waituntil;
    812  1.14   mycroft 	caddr_t dpos, cp2;
    813  1.14   mycroft 	int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
    814  1.14   mycroft 	int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
    815  1.24      fvdl 	int verf_len, verf_type;
    816  1.22       cgd 	u_int32_t xid;
    817  1.14   mycroft 	u_quad_t frev;
    818  1.24      fvdl 	char *auth_str, *verf_str;
    819  1.24      fvdl 	NFSKERBKEY_T key;		/* save session key */
    820   1.1       cgd 
    821  1.14   mycroft 	nmp = VFSTONFS(vp->v_mount);
    822   1.1       cgd 	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
    823   1.1       cgd 	rep->r_nmp = nmp;
    824   1.1       cgd 	rep->r_vp = vp;
    825   1.1       cgd 	rep->r_procp = procp;
    826  1.14   mycroft 	rep->r_procnum = procnum;
    827  1.14   mycroft 	i = 0;
    828  1.14   mycroft 	m = mrest;
    829   1.1       cgd 	while (m) {
    830  1.14   mycroft 		i += m->m_len;
    831   1.1       cgd 		m = m->m_next;
    832   1.1       cgd 	}
    833  1.14   mycroft 	mrest_len = i;
    834  1.14   mycroft 
    835  1.14   mycroft 	/*
    836  1.14   mycroft 	 * Get the RPC header with authorization.
    837  1.14   mycroft 	 */
    838  1.14   mycroft kerbauth:
    839  1.24      fvdl 	verf_str = auth_str = (char *)0;
    840  1.14   mycroft 	if (nmp->nm_flag & NFSMNT_KERB) {
    841  1.24      fvdl 		verf_str = nickv;
    842  1.24      fvdl 		verf_len = sizeof (nickv);
    843  1.24      fvdl 		auth_type = RPCAUTH_KERB4;
    844  1.24      fvdl 		bzero((caddr_t)key, sizeof (key));
    845  1.24      fvdl 		if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
    846  1.24      fvdl 			&auth_len, verf_str, verf_len)) {
    847  1.24      fvdl 			error = nfs_getauth(nmp, rep, cred, &auth_str,
    848  1.24      fvdl 				&auth_len, verf_str, &verf_len, key);
    849  1.14   mycroft 			if (error) {
    850  1.14   mycroft 				free((caddr_t)rep, M_NFSREQ);
    851  1.14   mycroft 				m_freem(mrest);
    852  1.14   mycroft 				return (error);
    853  1.14   mycroft 			}
    854   1.1       cgd 		}
    855  1.14   mycroft 	} else {
    856  1.14   mycroft 		auth_type = RPCAUTH_UNIX;
    857  1.20   mycroft 		auth_len = (((cred->cr_ngroups > nmp->nm_numgrps) ?
    858  1.20   mycroft 			nmp->nm_numgrps : cred->cr_ngroups) << 2) +
    859  1.14   mycroft 			5 * NFSX_UNSIGNED;
    860  1.14   mycroft 	}
    861  1.24      fvdl 	m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
    862  1.24      fvdl 	     auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
    863  1.14   mycroft 	if (auth_str)
    864  1.14   mycroft 		free(auth_str, M_TEMP);
    865  1.14   mycroft 
    866   1.1       cgd 	/*
    867  1.14   mycroft 	 * For stream protocols, insert a Sun RPC Record Mark.
    868   1.1       cgd 	 */
    869  1.14   mycroft 	if (nmp->nm_sotype == SOCK_STREAM) {
    870  1.14   mycroft 		M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
    871  1.22       cgd 		*mtod(m, u_int32_t *) = htonl(0x80000000 |
    872  1.14   mycroft 			 (m->m_pkthdr.len - NFSX_UNSIGNED));
    873   1.1       cgd 	}
    874  1.14   mycroft 	rep->r_mreq = m;
    875  1.14   mycroft 	rep->r_xid = xid;
    876  1.14   mycroft tryagain:
    877  1.14   mycroft 	if (nmp->nm_flag & NFSMNT_SOFT)
    878  1.14   mycroft 		rep->r_retry = nmp->nm_retry;
    879  1.14   mycroft 	else
    880  1.14   mycroft 		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
    881  1.14   mycroft 	rep->r_rtt = rep->r_rexmit = 0;
    882  1.14   mycroft 	if (proct[procnum] > 0)
    883  1.14   mycroft 		rep->r_flags = R_TIMING;
    884  1.14   mycroft 	else
    885  1.14   mycroft 		rep->r_flags = 0;
    886  1.14   mycroft 	rep->r_mrep = NULL;
    887   1.1       cgd 
    888   1.1       cgd 	/*
    889   1.1       cgd 	 * Do the client side RPC.
    890   1.1       cgd 	 */
    891   1.1       cgd 	nfsstats.rpcrequests++;
    892   1.1       cgd 	/*
    893   1.1       cgd 	 * Chain request into list of outstanding requests. Be sure
    894   1.1       cgd 	 * to put it LAST so timer finds oldest requests first.
    895   1.1       cgd 	 */
    896  1.14   mycroft 	s = splsoftclock();
    897  1.16   mycroft 	TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
    898  1.14   mycroft 
    899  1.14   mycroft 	/* Get send time for nqnfs */
    900  1.14   mycroft 	reqtime = time.tv_sec;
    901  1.14   mycroft 
    902   1.1       cgd 	/*
    903   1.1       cgd 	 * If backing off another request or avoiding congestion, don't
    904   1.1       cgd 	 * send this one now but let timer do it. If not timing a request,
    905   1.1       cgd 	 * do it now.
    906   1.1       cgd 	 */
    907  1.14   mycroft 	if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
    908  1.14   mycroft 		(nmp->nm_flag & NFSMNT_DUMBTIMR) ||
    909  1.14   mycroft 		nmp->nm_sent < nmp->nm_cwnd)) {
    910   1.1       cgd 		splx(s);
    911   1.1       cgd 		if (nmp->nm_soflags & PR_CONNREQUIRED)
    912  1.14   mycroft 			error = nfs_sndlock(&nmp->nm_flag, rep);
    913  1.14   mycroft 		if (!error) {
    914  1.14   mycroft 			m = m_copym(m, 0, M_COPYALL, M_WAIT);
    915  1.14   mycroft 			error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
    916  1.14   mycroft 			if (nmp->nm_soflags & PR_CONNREQUIRED)
    917  1.14   mycroft 				nfs_sndunlock(&nmp->nm_flag);
    918  1.14   mycroft 		}
    919  1.14   mycroft 		if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
    920  1.14   mycroft 			nmp->nm_sent += NFS_CWNDSCALE;
    921  1.14   mycroft 			rep->r_flags |= R_SENT;
    922  1.14   mycroft 		}
    923  1.14   mycroft 	} else {
    924   1.1       cgd 		splx(s);
    925  1.14   mycroft 		rep->r_rtt = -1;
    926  1.14   mycroft 	}
    927   1.1       cgd 
    928   1.1       cgd 	/*
    929   1.1       cgd 	 * Wait for the reply from our send or the timer's.
    930   1.1       cgd 	 */
    931  1.14   mycroft 	if (!error || error == EPIPE)
    932  1.14   mycroft 		error = nfs_reply(rep);
    933   1.1       cgd 
    934   1.1       cgd 	/*
    935   1.1       cgd 	 * RPC done, unlink the request.
    936   1.1       cgd 	 */
    937  1.14   mycroft 	s = splsoftclock();
    938  1.16   mycroft 	TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
    939   1.1       cgd 	splx(s);
    940   1.1       cgd 
    941   1.1       cgd 	/*
    942  1.14   mycroft 	 * Decrement the outstanding request count.
    943  1.14   mycroft 	 */
    944  1.14   mycroft 	if (rep->r_flags & R_SENT) {
    945  1.14   mycroft 		rep->r_flags &= ~R_SENT;	/* paranoia */
    946  1.14   mycroft 		nmp->nm_sent -= NFS_CWNDSCALE;
    947  1.14   mycroft 	}
    948  1.14   mycroft 
    949  1.14   mycroft 	/*
    950   1.1       cgd 	 * If there was a successful reply and a tprintf msg.
    951   1.1       cgd 	 * tprintf a response.
    952   1.1       cgd 	 */
    953   1.1       cgd 	if (!error && (rep->r_flags & R_TPRINTFMSG))
    954   1.1       cgd 		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
    955   1.1       cgd 		    "is alive again");
    956   1.1       cgd 	mrep = rep->r_mrep;
    957  1.14   mycroft 	md = rep->r_md;
    958  1.14   mycroft 	dpos = rep->r_dpos;
    959  1.14   mycroft 	if (error) {
    960  1.14   mycroft 		m_freem(rep->r_mreq);
    961  1.14   mycroft 		free((caddr_t)rep, M_NFSREQ);
    962   1.1       cgd 		return (error);
    963  1.14   mycroft 	}
    964   1.1       cgd 
    965   1.1       cgd 	/*
    966   1.1       cgd 	 * break down the rpc header and check if ok
    967   1.1       cgd 	 */
    968  1.24      fvdl 	nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
    969   1.1       cgd 	if (*tl++ == rpc_msgdenied) {
    970   1.1       cgd 		if (*tl == rpc_mismatch)
    971   1.1       cgd 			error = EOPNOTSUPP;
    972  1.14   mycroft 		else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
    973  1.24      fvdl 			if (!failed_auth) {
    974  1.14   mycroft 				failed_auth++;
    975  1.14   mycroft 				mheadend->m_next = (struct mbuf *)0;
    976  1.14   mycroft 				m_freem(mrep);
    977  1.14   mycroft 				m_freem(rep->r_mreq);
    978  1.14   mycroft 				goto kerbauth;
    979  1.14   mycroft 			} else
    980  1.14   mycroft 				error = EAUTH;
    981  1.14   mycroft 		} else
    982   1.1       cgd 			error = EACCES;
    983   1.1       cgd 		m_freem(mrep);
    984  1.14   mycroft 		m_freem(rep->r_mreq);
    985  1.14   mycroft 		free((caddr_t)rep, M_NFSREQ);
    986   1.1       cgd 		return (error);
    987   1.1       cgd 	}
    988  1.14   mycroft 
    989   1.1       cgd 	/*
    990  1.24      fvdl 	 * Grab any Kerberos verifier, otherwise just throw it away.
    991   1.1       cgd 	 */
    992  1.24      fvdl 	verf_type = fxdr_unsigned(int, *tl++);
    993  1.24      fvdl 	i = fxdr_unsigned(int32_t, *tl);
    994  1.24      fvdl 	if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
    995  1.24      fvdl 		error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
    996  1.24      fvdl 		if (error)
    997  1.24      fvdl 			goto nfsmout;
    998  1.24      fvdl 	} else if (i > 0)
    999  1.24      fvdl 		nfsm_adv(nfsm_rndup(i));
   1000  1.22       cgd 	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
   1001   1.1       cgd 	/* 0 == ok */
   1002   1.1       cgd 	if (*tl == 0) {
   1003  1.22       cgd 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
   1004   1.1       cgd 		if (*tl != 0) {
   1005   1.1       cgd 			error = fxdr_unsigned(int, *tl);
   1006  1.24      fvdl 			if ((nmp->nm_flag & NFSMNT_NFSV3) &&
   1007  1.24      fvdl 				error == NFSERR_TRYLATER) {
   1008  1.24      fvdl 				m_freem(mrep);
   1009  1.14   mycroft 				error = 0;
   1010  1.14   mycroft 				waituntil = time.tv_sec + trylater_delay;
   1011  1.14   mycroft 				while (time.tv_sec < waituntil)
   1012  1.14   mycroft 					(void) tsleep((caddr_t)&lbolt,
   1013  1.14   mycroft 						PSOCK, "nqnfstry", 0);
   1014  1.14   mycroft 				trylater_delay *= nfs_backoff[trylater_cnt];
   1015  1.14   mycroft 				if (trylater_cnt < 7)
   1016  1.14   mycroft 					trylater_cnt++;
   1017  1.14   mycroft 				goto tryagain;
   1018  1.14   mycroft 			}
   1019  1.14   mycroft 
   1020  1.14   mycroft 			/*
   1021  1.14   mycroft 			 * If the File Handle was stale, invalidate the
   1022  1.14   mycroft 			 * lookup cache, just in case.
   1023  1.14   mycroft 			 */
   1024  1.14   mycroft 			if (error == ESTALE)
   1025  1.14   mycroft 				cache_purge(vp);
   1026  1.24      fvdl 			if (nmp->nm_flag & NFSMNT_NFSV3) {
   1027  1.24      fvdl 				*mrp = mrep;
   1028  1.24      fvdl 				*mdp = md;
   1029  1.24      fvdl 				*dposp = dpos;
   1030  1.24      fvdl 				error |= NFSERR_RETERR;
   1031  1.24      fvdl 			} else
   1032  1.24      fvdl 				m_freem(mrep);
   1033  1.14   mycroft 			m_freem(rep->r_mreq);
   1034  1.14   mycroft 			free((caddr_t)rep, M_NFSREQ);
   1035   1.1       cgd 			return (error);
   1036   1.1       cgd 		}
   1037  1.14   mycroft 
   1038  1.14   mycroft 		/*
   1039  1.14   mycroft 		 * For nqnfs, get any lease in reply
   1040  1.14   mycroft 		 */
   1041  1.14   mycroft 		if (nmp->nm_flag & NFSMNT_NQNFS) {
   1042  1.22       cgd 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
   1043  1.14   mycroft 			if (*tl) {
   1044  1.14   mycroft 				np = VTONFS(vp);
   1045  1.14   mycroft 				nqlflag = fxdr_unsigned(int, *tl);
   1046  1.22       cgd 				nfsm_dissect(tl, u_int32_t *, 4*NFSX_UNSIGNED);
   1047  1.14   mycroft 				cachable = fxdr_unsigned(int, *tl++);
   1048  1.14   mycroft 				reqtime += fxdr_unsigned(int, *tl++);
   1049  1.14   mycroft 				if (reqtime > time.tv_sec) {
   1050  1.14   mycroft 				    fxdr_hyper(tl, &frev);
   1051  1.14   mycroft 				    nqnfs_clientlease(nmp, np, nqlflag,
   1052  1.14   mycroft 					cachable, reqtime, frev);
   1053  1.14   mycroft 				}
   1054  1.14   mycroft 			}
   1055  1.14   mycroft 		}
   1056   1.1       cgd 		*mrp = mrep;
   1057   1.1       cgd 		*mdp = md;
   1058   1.1       cgd 		*dposp = dpos;
   1059  1.14   mycroft 		m_freem(rep->r_mreq);
   1060  1.14   mycroft 		FREE((caddr_t)rep, M_NFSREQ);
   1061   1.1       cgd 		return (0);
   1062   1.1       cgd 	}
   1063   1.1       cgd 	m_freem(mrep);
   1064  1.24      fvdl 	error = EPROTONOSUPPORT;
   1065  1.24      fvdl nfsmout:
   1066  1.14   mycroft 	m_freem(rep->r_mreq);
   1067  1.14   mycroft 	free((caddr_t)rep, M_NFSREQ);
   1068   1.1       cgd 	return (error);
   1069   1.1       cgd }
   1070  1.32   thorpej #endif /* NFS */
   1071   1.1       cgd 
   1072   1.1       cgd /*
   1073   1.1       cgd  * Generate the rpc reply header
   1074   1.1       cgd  * siz arg. is used to decide if adding a cluster is worthwhile
   1075   1.1       cgd  */
   1076  1.23  christos int
   1077  1.24      fvdl nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
   1078   1.1       cgd 	int siz;
   1079  1.24      fvdl 	struct nfsrv_descript *nd;
   1080  1.24      fvdl 	struct nfssvc_sock *slp;
   1081   1.1       cgd 	int err;
   1082  1.14   mycroft 	int cache;
   1083  1.14   mycroft 	u_quad_t *frev;
   1084   1.1       cgd 	struct mbuf **mrq;
   1085   1.1       cgd 	struct mbuf **mbp;
   1086   1.1       cgd 	caddr_t *bposp;
   1087   1.1       cgd {
   1088  1.22       cgd 	register u_int32_t *tl;
   1089  1.14   mycroft 	register struct mbuf *mreq;
   1090   1.1       cgd 	caddr_t bpos;
   1091  1.14   mycroft 	struct mbuf *mb, *mb2;
   1092   1.1       cgd 
   1093  1.14   mycroft 	MGETHDR(mreq, M_WAIT, MT_DATA);
   1094   1.1       cgd 	mb = mreq;
   1095  1.14   mycroft 	/*
   1096  1.14   mycroft 	 * If this is a big reply, use a cluster else
   1097  1.14   mycroft 	 * try and leave leading space for the lower level headers.
   1098  1.14   mycroft 	 */
   1099  1.14   mycroft 	siz += RPC_REPLYSIZ;
   1100  1.14   mycroft 	if (siz >= MINCLSIZE) {
   1101   1.1       cgd 		MCLGET(mreq, M_WAIT);
   1102  1.14   mycroft 	} else
   1103  1.14   mycroft 		mreq->m_data += max_hdr;
   1104  1.22       cgd 	tl = mtod(mreq, u_int32_t *);
   1105  1.24      fvdl 	mreq->m_len = 6 * NFSX_UNSIGNED;
   1106  1.24      fvdl 	bpos = ((caddr_t)tl) + mreq->m_len;
   1107  1.14   mycroft 	*tl++ = txdr_unsigned(nd->nd_retxid);
   1108   1.1       cgd 	*tl++ = rpc_reply;
   1109  1.24      fvdl 	if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
   1110   1.1       cgd 		*tl++ = rpc_msgdenied;
   1111  1.24      fvdl 		if (err & NFSERR_AUTHERR) {
   1112  1.14   mycroft 			*tl++ = rpc_autherr;
   1113  1.24      fvdl 			*tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
   1114  1.14   mycroft 			mreq->m_len -= NFSX_UNSIGNED;
   1115  1.14   mycroft 			bpos -= NFSX_UNSIGNED;
   1116  1.14   mycroft 		} else {
   1117  1.14   mycroft 			*tl++ = rpc_mismatch;
   1118  1.24      fvdl 			*tl++ = txdr_unsigned(RPC_VER2);
   1119  1.24      fvdl 			*tl = txdr_unsigned(RPC_VER2);
   1120  1.14   mycroft 		}
   1121   1.1       cgd 	} else {
   1122   1.1       cgd 		*tl++ = rpc_msgaccepted;
   1123  1.24      fvdl 
   1124  1.24      fvdl 		/*
   1125  1.24      fvdl 		 * For Kerberos authentication, we must send the nickname
   1126  1.24      fvdl 		 * verifier back, otherwise just RPCAUTH_NULL.
   1127  1.24      fvdl 		 */
   1128  1.24      fvdl 		if (nd->nd_flag & ND_KERBFULL) {
   1129  1.24      fvdl 		    register struct nfsuid *nuidp;
   1130  1.24      fvdl 		    struct timeval ktvin, ktvout;
   1131  1.24      fvdl 
   1132  1.24      fvdl 		    for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
   1133  1.24      fvdl 			nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
   1134  1.24      fvdl 			if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
   1135  1.24      fvdl 			    (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
   1136  1.24      fvdl 			     &nuidp->nu_haddr, nd->nd_nam2)))
   1137  1.24      fvdl 			    break;
   1138  1.24      fvdl 		    }
   1139  1.24      fvdl 		    if (nuidp) {
   1140  1.24      fvdl 			ktvin.tv_sec =
   1141  1.24      fvdl 			    txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
   1142  1.24      fvdl 			ktvin.tv_usec =
   1143  1.24      fvdl 			    txdr_unsigned(nuidp->nu_timestamp.tv_usec);
   1144  1.24      fvdl 
   1145  1.24      fvdl 			/*
   1146  1.24      fvdl 			 * Encrypt the timestamp in ecb mode using the
   1147  1.24      fvdl 			 * session key.
   1148  1.24      fvdl 			 */
   1149  1.24      fvdl #ifdef NFSKERB
   1150  1.24      fvdl 			XXX
   1151  1.24      fvdl #endif
   1152  1.24      fvdl 
   1153  1.24      fvdl 			*tl++ = rpc_auth_kerb;
   1154  1.24      fvdl 			*tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
   1155  1.24      fvdl 			*tl = ktvout.tv_sec;
   1156  1.24      fvdl 			nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
   1157  1.24      fvdl 			*tl++ = ktvout.tv_usec;
   1158  1.24      fvdl 			*tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
   1159  1.24      fvdl 		    } else {
   1160  1.24      fvdl 			*tl++ = 0;
   1161  1.24      fvdl 			*tl++ = 0;
   1162  1.24      fvdl 		    }
   1163  1.24      fvdl 		} else {
   1164  1.24      fvdl 			*tl++ = 0;
   1165  1.24      fvdl 			*tl++ = 0;
   1166  1.24      fvdl 		}
   1167   1.1       cgd 		switch (err) {
   1168   1.1       cgd 		case EPROGUNAVAIL:
   1169   1.1       cgd 			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
   1170   1.1       cgd 			break;
   1171   1.1       cgd 		case EPROGMISMATCH:
   1172   1.1       cgd 			*tl = txdr_unsigned(RPC_PROGMISMATCH);
   1173  1.24      fvdl 			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
   1174  1.24      fvdl 			if (nd->nd_flag & ND_NQNFS) {
   1175  1.24      fvdl 				*tl++ = txdr_unsigned(3);
   1176  1.24      fvdl 				*tl = txdr_unsigned(3);
   1177  1.24      fvdl 			} else {
   1178  1.24      fvdl 				*tl++ = txdr_unsigned(2);
   1179  1.24      fvdl 				*tl = txdr_unsigned(3);
   1180  1.24      fvdl 			}
   1181   1.1       cgd 			break;
   1182   1.1       cgd 		case EPROCUNAVAIL:
   1183   1.1       cgd 			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
   1184   1.1       cgd 			break;
   1185  1.24      fvdl 		case EBADRPC:
   1186  1.24      fvdl 			*tl = txdr_unsigned(RPC_GARBAGE);
   1187  1.24      fvdl 			break;
   1188   1.1       cgd 		default:
   1189   1.1       cgd 			*tl = 0;
   1190  1.24      fvdl 			if (err != NFSERR_RETVOID) {
   1191  1.22       cgd 				nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
   1192  1.14   mycroft 				if (err)
   1193  1.24      fvdl 				    *tl = txdr_unsigned(nfsrv_errmap(nd, err));
   1194  1.14   mycroft 				else
   1195  1.24      fvdl 				    *tl = 0;
   1196   1.1       cgd 			}
   1197   1.1       cgd 			break;
   1198   1.1       cgd 		};
   1199   1.1       cgd 	}
   1200  1.14   mycroft 
   1201  1.14   mycroft 	/*
   1202  1.14   mycroft 	 * For nqnfs, piggyback lease as requested.
   1203  1.14   mycroft 	 */
   1204  1.24      fvdl 	if ((nd->nd_flag & ND_NQNFS) && err == 0) {
   1205  1.24      fvdl 		if (nd->nd_flag & ND_LEASE) {
   1206  1.24      fvdl 			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
   1207  1.24      fvdl 			*tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
   1208  1.14   mycroft 			*tl++ = txdr_unsigned(cache);
   1209  1.14   mycroft 			*tl++ = txdr_unsigned(nd->nd_duration);
   1210  1.14   mycroft 			txdr_hyper(frev, tl);
   1211  1.14   mycroft 		} else {
   1212  1.22       cgd 			nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
   1213  1.14   mycroft 			*tl = 0;
   1214  1.14   mycroft 		}
   1215  1.14   mycroft 	}
   1216   1.1       cgd 	*mrq = mreq;
   1217   1.1       cgd 	*mbp = mb;
   1218   1.1       cgd 	*bposp = bpos;
   1219  1.24      fvdl 	if (err != 0 && err != NFSERR_RETVOID)
   1220   1.1       cgd 		nfsstats.srvrpc_errs++;
   1221   1.1       cgd 	return (0);
   1222   1.1       cgd }
   1223   1.1       cgd 
   1224   1.1       cgd /*
   1225   1.1       cgd  * Nfs timer routine
   1226   1.1       cgd  * Scan the nfsreq list and retranmit any requests that have timed out
   1227   1.1       cgd  * To avoid retransmission attempts on STREAM sockets (in the future) make
   1228   1.1       cgd  * sure to set the r_retry field to 0 (implies nm_retry == 0).
   1229   1.1       cgd  */
   1230   1.7   mycroft void
   1231  1.14   mycroft nfs_timer(arg)
   1232  1.24      fvdl 	void *arg;	/* never used */
   1233   1.1       cgd {
   1234   1.1       cgd 	register struct nfsreq *rep;
   1235   1.1       cgd 	register struct mbuf *m;
   1236   1.1       cgd 	register struct socket *so;
   1237   1.1       cgd 	register struct nfsmount *nmp;
   1238  1.14   mycroft 	register int timeo;
   1239  1.27   thorpej 	int s, error;
   1240  1.27   thorpej #ifdef NFSSERVER
   1241  1.24      fvdl 	register struct nfssvc_sock *slp;
   1242  1.14   mycroft 	static long lasttime = 0;
   1243  1.27   thorpej 	u_quad_t cur_usec;
   1244  1.23  christos #endif
   1245   1.1       cgd 
   1246  1.21   mycroft 	s = splsoftnet();
   1247  1.16   mycroft 	for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
   1248   1.1       cgd 		nmp = rep->r_nmp;
   1249  1.14   mycroft 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
   1250   1.1       cgd 			continue;
   1251  1.14   mycroft 		if (nfs_sigintr(nmp, rep, rep->r_procp)) {
   1252   1.1       cgd 			rep->r_flags |= R_SOFTTERM;
   1253   1.1       cgd 			continue;
   1254   1.1       cgd 		}
   1255  1.14   mycroft 		if (rep->r_rtt >= 0) {
   1256  1.14   mycroft 			rep->r_rtt++;
   1257  1.14   mycroft 			if (nmp->nm_flag & NFSMNT_DUMBTIMR)
   1258  1.14   mycroft 				timeo = nmp->nm_timeo;
   1259  1.14   mycroft 			else
   1260  1.14   mycroft 				timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
   1261  1.14   mycroft 			if (nmp->nm_timeouts > 0)
   1262  1.14   mycroft 				timeo *= nfs_backoff[nmp->nm_timeouts - 1];
   1263  1.14   mycroft 			if (rep->r_rtt <= timeo)
   1264  1.14   mycroft 				continue;
   1265  1.14   mycroft 			if (nmp->nm_timeouts < 8)
   1266  1.14   mycroft 				nmp->nm_timeouts++;
   1267   1.1       cgd 		}
   1268   1.1       cgd 		/*
   1269   1.1       cgd 		 * Check for server not responding
   1270   1.1       cgd 		 */
   1271   1.1       cgd 		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
   1272  1.14   mycroft 		     rep->r_rexmit > nmp->nm_deadthresh) {
   1273   1.1       cgd 			nfs_msg(rep->r_procp,
   1274   1.1       cgd 			    nmp->nm_mountp->mnt_stat.f_mntfromname,
   1275   1.1       cgd 			    "not responding");
   1276   1.1       cgd 			rep->r_flags |= R_TPRINTFMSG;
   1277   1.1       cgd 		}
   1278   1.1       cgd 		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
   1279   1.1       cgd 			nfsstats.rpctimeouts++;
   1280   1.1       cgd 			rep->r_flags |= R_SOFTTERM;
   1281   1.1       cgd 			continue;
   1282   1.1       cgd 		}
   1283  1.14   mycroft 		if (nmp->nm_sotype != SOCK_DGRAM) {
   1284  1.14   mycroft 			if (++rep->r_rexmit > NFS_MAXREXMIT)
   1285  1.14   mycroft 				rep->r_rexmit = NFS_MAXREXMIT;
   1286  1.14   mycroft 			continue;
   1287  1.14   mycroft 		}
   1288  1.14   mycroft 		if ((so = nmp->nm_so) == NULL)
   1289   1.1       cgd 			continue;
   1290   1.1       cgd 
   1291   1.1       cgd 		/*
   1292   1.1       cgd 		 * If there is enough space and the window allows..
   1293   1.1       cgd 		 *	Resend it
   1294  1.14   mycroft 		 * Set r_rtt to -1 in case we fail to send it now.
   1295   1.1       cgd 		 */
   1296  1.14   mycroft 		rep->r_rtt = -1;
   1297   1.1       cgd 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
   1298  1.14   mycroft 		   ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
   1299  1.14   mycroft 		    (rep->r_flags & R_SENT) ||
   1300  1.14   mycroft 		    nmp->nm_sent < nmp->nm_cwnd) &&
   1301  1.14   mycroft 		   (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
   1302   1.1       cgd 			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
   1303   1.1       cgd 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1304  1.28   mycroft 			    (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
   1305   1.1       cgd 			else
   1306   1.1       cgd 			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
   1307  1.28   mycroft 			    nmp->nm_nam, (struct mbuf *)0, (struct proc *)0);
   1308   1.1       cgd 			if (error) {
   1309  1.33      fvdl 				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
   1310  1.33      fvdl 					printf("nfs_timer: ignoring error %d\n",
   1311  1.33      fvdl 						error);
   1312   1.1       cgd 					so->so_error = 0;
   1313  1.33      fvdl 				}
   1314   1.1       cgd 			} else {
   1315   1.1       cgd 				/*
   1316  1.14   mycroft 				 * Iff first send, start timing
   1317  1.14   mycroft 				 * else turn timing off, backoff timer
   1318  1.14   mycroft 				 * and divide congestion window by 2.
   1319   1.1       cgd 				 */
   1320  1.14   mycroft 				if (rep->r_flags & R_SENT) {
   1321  1.14   mycroft 					rep->r_flags &= ~R_TIMING;
   1322  1.14   mycroft 					if (++rep->r_rexmit > NFS_MAXREXMIT)
   1323  1.14   mycroft 						rep->r_rexmit = NFS_MAXREXMIT;
   1324  1.14   mycroft 					nmp->nm_cwnd >>= 1;
   1325  1.14   mycroft 					if (nmp->nm_cwnd < NFS_CWNDSCALE)
   1326  1.14   mycroft 						nmp->nm_cwnd = NFS_CWNDSCALE;
   1327  1.14   mycroft 					nfsstats.rpcretries++;
   1328  1.14   mycroft 				} else {
   1329  1.14   mycroft 					rep->r_flags |= R_SENT;
   1330  1.14   mycroft 					nmp->nm_sent += NFS_CWNDSCALE;
   1331  1.14   mycroft 				}
   1332  1.14   mycroft 				rep->r_rtt = 0;
   1333   1.1       cgd 			}
   1334   1.1       cgd 		}
   1335   1.1       cgd 	}
   1336  1.14   mycroft 
   1337  1.14   mycroft #ifdef NFSSERVER
   1338  1.14   mycroft 	/*
   1339  1.14   mycroft 	 * Call the nqnfs server timer once a second to handle leases.
   1340  1.14   mycroft 	 */
   1341  1.14   mycroft 	if (lasttime != time.tv_sec) {
   1342  1.14   mycroft 		lasttime = time.tv_sec;
   1343  1.14   mycroft 		nqnfs_serverd();
   1344  1.14   mycroft 	}
   1345  1.24      fvdl 
   1346  1.24      fvdl 	/*
   1347  1.24      fvdl 	 * Scan the write gathering queues for writes that need to be
   1348  1.24      fvdl 	 * completed now.
   1349  1.24      fvdl 	 */
   1350  1.24      fvdl 	cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec;
   1351  1.24      fvdl 	for (slp = nfssvc_sockhead.tqh_first; slp != 0;
   1352  1.24      fvdl 	    slp = slp->ns_chain.tqe_next) {
   1353  1.24      fvdl 	    if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec)
   1354  1.24      fvdl 		nfsrv_wakenfsd(slp);
   1355  1.24      fvdl 	}
   1356  1.14   mycroft #endif /* NFSSERVER */
   1357   1.1       cgd 	splx(s);
   1358  1.24      fvdl 	timeout(nfs_timer, (void *)0, nfs_ticks);
   1359   1.1       cgd }
   1360   1.1       cgd 
   1361   1.1       cgd /*
   1362  1.14   mycroft  * Test for a termination condition pending on the process.
   1363  1.14   mycroft  * This is used for NFSMNT_INT mounts.
   1364   1.1       cgd  */
   1365  1.23  christos int
   1366  1.14   mycroft nfs_sigintr(nmp, rep, p)
   1367  1.14   mycroft 	struct nfsmount *nmp;
   1368  1.14   mycroft 	struct nfsreq *rep;
   1369  1.14   mycroft 	register struct proc *p;
   1370  1.14   mycroft {
   1371  1.14   mycroft 
   1372  1.14   mycroft 	if (rep && (rep->r_flags & R_SOFTTERM))
   1373  1.14   mycroft 		return (EINTR);
   1374  1.14   mycroft 	if (!(nmp->nm_flag & NFSMNT_INT))
   1375  1.14   mycroft 		return (0);
   1376  1.14   mycroft 	if (p && p->p_siglist &&
   1377  1.14   mycroft 	    (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
   1378  1.14   mycroft 	    NFSINT_SIGMASK))
   1379  1.14   mycroft 		return (EINTR);
   1380  1.14   mycroft 	return (0);
   1381  1.14   mycroft }
   1382   1.1       cgd 
   1383   1.1       cgd /*
   1384  1.14   mycroft  * Lock a socket against others.
   1385  1.14   mycroft  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
   1386  1.14   mycroft  * and also to avoid race conditions between the processes with nfs requests
   1387  1.14   mycroft  * in progress when a reconnect is necessary.
   1388   1.1       cgd  */
   1389  1.23  christos int
   1390  1.14   mycroft nfs_sndlock(flagp, rep)
   1391  1.14   mycroft 	register int *flagp;
   1392  1.14   mycroft 	struct nfsreq *rep;
   1393  1.14   mycroft {
   1394  1.14   mycroft 	struct proc *p;
   1395  1.14   mycroft 	int slpflag = 0, slptimeo = 0;
   1396  1.14   mycroft 
   1397  1.14   mycroft 	if (rep) {
   1398  1.14   mycroft 		p = rep->r_procp;
   1399  1.14   mycroft 		if (rep->r_nmp->nm_flag & NFSMNT_INT)
   1400  1.14   mycroft 			slpflag = PCATCH;
   1401  1.14   mycroft 	} else
   1402  1.14   mycroft 		p = (struct proc *)0;
   1403  1.14   mycroft 	while (*flagp & NFSMNT_SNDLOCK) {
   1404  1.14   mycroft 		if (nfs_sigintr(rep->r_nmp, rep, p))
   1405  1.14   mycroft 			return (EINTR);
   1406  1.14   mycroft 		*flagp |= NFSMNT_WANTSND;
   1407  1.14   mycroft 		(void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
   1408  1.14   mycroft 			slptimeo);
   1409  1.14   mycroft 		if (slpflag == PCATCH) {
   1410  1.14   mycroft 			slpflag = 0;
   1411  1.14   mycroft 			slptimeo = 2 * hz;
   1412  1.14   mycroft 		}
   1413  1.14   mycroft 	}
   1414  1.14   mycroft 	*flagp |= NFSMNT_SNDLOCK;
   1415  1.14   mycroft 	return (0);
   1416  1.14   mycroft }
   1417   1.1       cgd 
   1418  1.14   mycroft /*
   1419  1.14   mycroft  * Unlock the stream socket for others.
   1420  1.14   mycroft  */
   1421  1.14   mycroft void
   1422  1.14   mycroft nfs_sndunlock(flagp)
   1423  1.14   mycroft 	register int *flagp;
   1424   1.1       cgd {
   1425   1.1       cgd 
   1426  1.14   mycroft 	if ((*flagp & NFSMNT_SNDLOCK) == 0)
   1427  1.14   mycroft 		panic("nfs sndunlock");
   1428  1.14   mycroft 	*flagp &= ~NFSMNT_SNDLOCK;
   1429  1.14   mycroft 	if (*flagp & NFSMNT_WANTSND) {
   1430  1.14   mycroft 		*flagp &= ~NFSMNT_WANTSND;
   1431  1.14   mycroft 		wakeup((caddr_t)flagp);
   1432   1.1       cgd 	}
   1433  1.14   mycroft }
   1434  1.14   mycroft 
   1435  1.23  christos int
   1436  1.14   mycroft nfs_rcvlock(rep)
   1437  1.14   mycroft 	register struct nfsreq *rep;
   1438  1.14   mycroft {
   1439  1.14   mycroft 	register int *flagp = &rep->r_nmp->nm_flag;
   1440  1.14   mycroft 	int slpflag, slptimeo = 0;
   1441  1.14   mycroft 
   1442  1.14   mycroft 	if (*flagp & NFSMNT_INT)
   1443  1.14   mycroft 		slpflag = PCATCH;
   1444  1.14   mycroft 	else
   1445  1.14   mycroft 		slpflag = 0;
   1446  1.14   mycroft 	while (*flagp & NFSMNT_RCVLOCK) {
   1447  1.14   mycroft 		if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
   1448  1.14   mycroft 			return (EINTR);
   1449  1.14   mycroft 		*flagp |= NFSMNT_WANTRCV;
   1450  1.14   mycroft 		(void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
   1451  1.14   mycroft 			slptimeo);
   1452  1.14   mycroft 		if (slpflag == PCATCH) {
   1453  1.14   mycroft 			slpflag = 0;
   1454  1.14   mycroft 			slptimeo = 2 * hz;
   1455   1.1       cgd 		}
   1456   1.1       cgd 	}
   1457  1.14   mycroft 	*flagp |= NFSMNT_RCVLOCK;
   1458  1.14   mycroft 	return (0);
   1459  1.14   mycroft }
   1460  1.14   mycroft 
   1461  1.14   mycroft /*
   1462  1.14   mycroft  * Unlock the stream socket for others.
   1463  1.14   mycroft  */
   1464  1.14   mycroft void
   1465  1.14   mycroft nfs_rcvunlock(flagp)
   1466  1.14   mycroft 	register int *flagp;
   1467  1.14   mycroft {
   1468  1.14   mycroft 
   1469  1.14   mycroft 	if ((*flagp & NFSMNT_RCVLOCK) == 0)
   1470  1.14   mycroft 		panic("nfs rcvunlock");
   1471  1.14   mycroft 	*flagp &= ~NFSMNT_RCVLOCK;
   1472  1.14   mycroft 	if (*flagp & NFSMNT_WANTRCV) {
   1473  1.14   mycroft 		*flagp &= ~NFSMNT_WANTRCV;
   1474  1.14   mycroft 		wakeup((caddr_t)flagp);
   1475  1.14   mycroft 	}
   1476   1.1       cgd }
   1477   1.1       cgd 
   1478  1.14   mycroft /*
   1479  1.14   mycroft  * Check for badly aligned mbuf data areas and
   1480  1.14   mycroft  * realign data in an mbuf list by copying the data areas up, as required.
   1481  1.14   mycroft  */
   1482  1.14   mycroft void
   1483  1.14   mycroft nfs_realign(m, hsiz)
   1484  1.14   mycroft 	register struct mbuf *m;
   1485  1.14   mycroft 	int hsiz;
   1486   1.1       cgd {
   1487  1.14   mycroft 	register struct mbuf *m2;
   1488  1.14   mycroft 	register int siz, mlen, olen;
   1489  1.14   mycroft 	register caddr_t tcp, fcp;
   1490  1.14   mycroft 	struct mbuf *mnew;
   1491   1.1       cgd 
   1492  1.14   mycroft 	while (m) {
   1493  1.14   mycroft 	    /*
   1494  1.14   mycroft 	     * This never happens for UDP, rarely happens for TCP
   1495  1.14   mycroft 	     * but frequently happens for iso transport.
   1496  1.14   mycroft 	     */
   1497  1.22       cgd 	    if ((m->m_len & 0x3) || (mtod(m, long) & 0x3)) {
   1498  1.14   mycroft 		olen = m->m_len;
   1499  1.14   mycroft 		fcp = mtod(m, caddr_t);
   1500  1.22       cgd 		if ((long)fcp & 0x3) {
   1501  1.14   mycroft 			m->m_flags &= ~M_PKTHDR;
   1502  1.14   mycroft 			if (m->m_flags & M_EXT)
   1503  1.14   mycroft 				m->m_data = m->m_ext.ext_buf +
   1504  1.14   mycroft 					((m->m_ext.ext_size - olen) & ~0x3);
   1505  1.14   mycroft 			else
   1506  1.14   mycroft 				m->m_data = m->m_dat;
   1507  1.14   mycroft 		}
   1508  1.14   mycroft 		m->m_len = 0;
   1509  1.14   mycroft 		tcp = mtod(m, caddr_t);
   1510  1.14   mycroft 		mnew = m;
   1511  1.14   mycroft 		m2 = m->m_next;
   1512  1.14   mycroft 
   1513  1.14   mycroft 		/*
   1514  1.14   mycroft 		 * If possible, only put the first invariant part
   1515  1.14   mycroft 		 * of the RPC header in the first mbuf.
   1516  1.14   mycroft 		 */
   1517  1.14   mycroft 		mlen = M_TRAILINGSPACE(m);
   1518  1.14   mycroft 		if (olen <= hsiz && mlen > hsiz)
   1519  1.14   mycroft 			mlen = hsiz;
   1520  1.14   mycroft 
   1521  1.14   mycroft 		/*
   1522  1.14   mycroft 		 * Loop through the mbuf list consolidating data.
   1523  1.14   mycroft 		 */
   1524  1.14   mycroft 		while (m) {
   1525  1.14   mycroft 			while (olen > 0) {
   1526  1.14   mycroft 				if (mlen == 0) {
   1527  1.14   mycroft 					m2->m_flags &= ~M_PKTHDR;
   1528  1.14   mycroft 					if (m2->m_flags & M_EXT)
   1529  1.14   mycroft 						m2->m_data = m2->m_ext.ext_buf;
   1530  1.14   mycroft 					else
   1531  1.14   mycroft 						m2->m_data = m2->m_dat;
   1532  1.14   mycroft 					m2->m_len = 0;
   1533  1.14   mycroft 					mlen = M_TRAILINGSPACE(m2);
   1534  1.14   mycroft 					tcp = mtod(m2, caddr_t);
   1535  1.14   mycroft 					mnew = m2;
   1536  1.14   mycroft 					m2 = m2->m_next;
   1537  1.14   mycroft 				}
   1538  1.14   mycroft 				siz = min(mlen, olen);
   1539  1.14   mycroft 				if (tcp != fcp)
   1540  1.14   mycroft 					bcopy(fcp, tcp, siz);
   1541  1.14   mycroft 				mnew->m_len += siz;
   1542  1.14   mycroft 				mlen -= siz;
   1543  1.14   mycroft 				olen -= siz;
   1544  1.14   mycroft 				tcp += siz;
   1545  1.14   mycroft 				fcp += siz;
   1546   1.1       cgd 			}
   1547  1.14   mycroft 			m = m->m_next;
   1548  1.14   mycroft 			if (m) {
   1549  1.14   mycroft 				olen = m->m_len;
   1550  1.14   mycroft 				fcp = mtod(m, caddr_t);
   1551  1.14   mycroft 			}
   1552  1.14   mycroft 		}
   1553  1.14   mycroft 
   1554  1.14   mycroft 		/*
   1555  1.14   mycroft 		 * Finally, set m_len == 0 for any trailing mbufs that have
   1556  1.14   mycroft 		 * been copied out of.
   1557  1.14   mycroft 		 */
   1558  1.14   mycroft 		while (m2) {
   1559  1.14   mycroft 			m2->m_len = 0;
   1560  1.14   mycroft 			m2 = m2->m_next;
   1561   1.1       cgd 		}
   1562  1.14   mycroft 		return;
   1563  1.14   mycroft 	    }
   1564  1.14   mycroft 	    m = m->m_next;
   1565   1.1       cgd 	}
   1566   1.1       cgd }
   1567   1.1       cgd 
   1568   1.1       cgd /*
   1569  1.14   mycroft  * Parse an RPC request
   1570  1.14   mycroft  * - verify it
   1571  1.14   mycroft  * - fill in the cred struct.
   1572   1.1       cgd  */
   1573  1.23  christos int
   1574  1.24      fvdl nfs_getreq(nd, nfsd, has_header)
   1575  1.24      fvdl 	register struct nfsrv_descript *nd;
   1576  1.24      fvdl 	struct nfsd *nfsd;
   1577  1.14   mycroft 	int has_header;
   1578   1.1       cgd {
   1579  1.14   mycroft 	register int len, i;
   1580  1.22       cgd 	register u_int32_t *tl;
   1581  1.22       cgd 	register int32_t t1;
   1582  1.14   mycroft 	struct uio uio;
   1583  1.14   mycroft 	struct iovec iov;
   1584  1.24      fvdl 	caddr_t dpos, cp2, cp;
   1585  1.22       cgd 	u_int32_t nfsvers, auth_type;
   1586  1.24      fvdl 	uid_t nickuid;
   1587  1.24      fvdl 	int error = 0, nqnfs = 0, ticklen;
   1588  1.14   mycroft 	struct mbuf *mrep, *md;
   1589  1.24      fvdl 	register struct nfsuid *nuidp;
   1590  1.24      fvdl 	struct timeval tvin, tvout;
   1591  1.14   mycroft 
   1592  1.14   mycroft 	mrep = nd->nd_mrep;
   1593  1.14   mycroft 	md = nd->nd_md;
   1594  1.14   mycroft 	dpos = nd->nd_dpos;
   1595  1.14   mycroft 	if (has_header) {
   1596  1.24      fvdl 		nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED);
   1597  1.24      fvdl 		nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
   1598  1.14   mycroft 		if (*tl++ != rpc_call) {
   1599  1.14   mycroft 			m_freem(mrep);
   1600  1.14   mycroft 			return (EBADRPC);
   1601  1.14   mycroft 		}
   1602  1.24      fvdl 	} else
   1603  1.24      fvdl 		nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
   1604  1.14   mycroft 	nd->nd_repstat = 0;
   1605  1.24      fvdl 	nd->nd_flag = 0;
   1606  1.14   mycroft 	if (*tl++ != rpc_vers) {
   1607  1.14   mycroft 		nd->nd_repstat = ERPCMISMATCH;
   1608  1.14   mycroft 		nd->nd_procnum = NFSPROC_NOOP;
   1609  1.14   mycroft 		return (0);
   1610  1.14   mycroft 	}
   1611  1.14   mycroft 	if (*tl != nfs_prog) {
   1612  1.24      fvdl 		if (*tl == nqnfs_prog)
   1613  1.14   mycroft 			nqnfs++;
   1614  1.24      fvdl 		else {
   1615  1.14   mycroft 			nd->nd_repstat = EPROGUNAVAIL;
   1616  1.14   mycroft 			nd->nd_procnum = NFSPROC_NOOP;
   1617  1.14   mycroft 			return (0);
   1618  1.14   mycroft 		}
   1619  1.14   mycroft 	}
   1620  1.14   mycroft 	tl++;
   1621  1.24      fvdl 	nfsvers = fxdr_unsigned(u_int32_t, *tl++);
   1622  1.24      fvdl 	if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
   1623  1.24      fvdl 		(nfsvers != NQNFS_VER3 && nqnfs)) {
   1624  1.14   mycroft 		nd->nd_repstat = EPROGMISMATCH;
   1625  1.14   mycroft 		nd->nd_procnum = NFSPROC_NOOP;
   1626  1.14   mycroft 		return (0);
   1627  1.14   mycroft 	}
   1628  1.24      fvdl 	if (nqnfs)
   1629  1.24      fvdl 		nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
   1630  1.24      fvdl 	else if (nfsvers == NFS_VER3)
   1631  1.24      fvdl 		nd->nd_flag = ND_NFSV3;
   1632  1.24      fvdl 	nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
   1633  1.14   mycroft 	if (nd->nd_procnum == NFSPROC_NULL)
   1634  1.14   mycroft 		return (0);
   1635  1.14   mycroft 	if (nd->nd_procnum >= NFS_NPROCS ||
   1636  1.24      fvdl 		(!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
   1637  1.24      fvdl 		(!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
   1638  1.14   mycroft 		nd->nd_repstat = EPROCUNAVAIL;
   1639  1.14   mycroft 		nd->nd_procnum = NFSPROC_NOOP;
   1640   1.1       cgd 		return (0);
   1641  1.14   mycroft 	}
   1642  1.24      fvdl 	if ((nd->nd_flag & ND_NFSV3) == 0)
   1643  1.24      fvdl 		nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
   1644  1.14   mycroft 	auth_type = *tl++;
   1645  1.14   mycroft 	len = fxdr_unsigned(int, *tl++);
   1646  1.14   mycroft 	if (len < 0 || len > RPCAUTH_MAXSIZ) {
   1647  1.14   mycroft 		m_freem(mrep);
   1648  1.14   mycroft 		return (EBADRPC);
   1649  1.14   mycroft 	}
   1650  1.14   mycroft 
   1651  1.24      fvdl 	nd->nd_flag &= ~ND_KERBAUTH;
   1652  1.14   mycroft 	/*
   1653  1.14   mycroft 	 * Handle auth_unix or auth_kerb.
   1654  1.14   mycroft 	 */
   1655  1.14   mycroft 	if (auth_type == rpc_auth_unix) {
   1656  1.14   mycroft 		len = fxdr_unsigned(int, *++tl);
   1657  1.14   mycroft 		if (len < 0 || len > NFS_MAXNAMLEN) {
   1658  1.14   mycroft 			m_freem(mrep);
   1659  1.14   mycroft 			return (EBADRPC);
   1660  1.14   mycroft 		}
   1661  1.14   mycroft 		nfsm_adv(nfsm_rndup(len));
   1662  1.24      fvdl 		nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
   1663  1.24      fvdl 		bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
   1664  1.24      fvdl 		nd->nd_cr.cr_ref = 1;
   1665  1.14   mycroft 		nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
   1666  1.14   mycroft 		nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
   1667  1.14   mycroft 		len = fxdr_unsigned(int, *tl);
   1668  1.14   mycroft 		if (len < 0 || len > RPCAUTH_UNIXGIDS) {
   1669  1.14   mycroft 			m_freem(mrep);
   1670  1.14   mycroft 			return (EBADRPC);
   1671  1.14   mycroft 		}
   1672  1.24      fvdl 		nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED);
   1673  1.18   mycroft 		for (i = 0; i < len; i++)
   1674  1.24      fvdl 		    if (i < NGROUPS)
   1675  1.24      fvdl 			nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
   1676  1.24      fvdl 		    else
   1677  1.24      fvdl 			tl++;
   1678  1.19   mycroft 		nd->nd_cr.cr_ngroups = (len > NGROUPS) ? NGROUPS : len;
   1679  1.24      fvdl 		if (nd->nd_cr.cr_ngroups > 1)
   1680  1.24      fvdl 		    nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
   1681  1.24      fvdl 		len = fxdr_unsigned(int, *++tl);
   1682  1.24      fvdl 		if (len < 0 || len > RPCAUTH_MAXSIZ) {
   1683  1.14   mycroft 			m_freem(mrep);
   1684  1.14   mycroft 			return (EBADRPC);
   1685  1.14   mycroft 		}
   1686  1.24      fvdl 		if (len > 0)
   1687  1.24      fvdl 			nfsm_adv(nfsm_rndup(len));
   1688  1.24      fvdl 	} else if (auth_type == rpc_auth_kerb) {
   1689  1.24      fvdl 		switch (fxdr_unsigned(int, *tl++)) {
   1690  1.24      fvdl 		case RPCAKN_FULLNAME:
   1691  1.24      fvdl 			ticklen = fxdr_unsigned(int, *tl);
   1692  1.24      fvdl 			*((u_int32_t *)nfsd->nfsd_authstr) = *tl;
   1693  1.24      fvdl 			uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
   1694  1.24      fvdl 			nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
   1695  1.24      fvdl 			if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
   1696  1.24      fvdl 				m_freem(mrep);
   1697  1.24      fvdl 				return (EBADRPC);
   1698  1.24      fvdl 			}
   1699  1.24      fvdl 			uio.uio_offset = 0;
   1700  1.24      fvdl 			uio.uio_iov = &iov;
   1701  1.24      fvdl 			uio.uio_iovcnt = 1;
   1702  1.24      fvdl 			uio.uio_segflg = UIO_SYSSPACE;
   1703  1.24      fvdl 			iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
   1704  1.24      fvdl 			iov.iov_len = RPCAUTH_MAXSIZ - 4;
   1705  1.24      fvdl 			nfsm_mtouio(&uio, uio.uio_resid);
   1706  1.24      fvdl 			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
   1707  1.24      fvdl 			if (*tl++ != rpc_auth_kerb ||
   1708  1.24      fvdl 				fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
   1709  1.31  christos 				printf("Bad kerb verifier\n");
   1710  1.24      fvdl 				nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
   1711  1.24      fvdl 				nd->nd_procnum = NFSPROC_NOOP;
   1712  1.24      fvdl 				return (0);
   1713  1.24      fvdl 			}
   1714  1.24      fvdl 			nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
   1715  1.24      fvdl 			tl = (u_int32_t *)cp;
   1716  1.24      fvdl 			if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
   1717  1.31  christos 				printf("Not fullname kerb verifier\n");
   1718  1.24      fvdl 				nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
   1719  1.24      fvdl 				nd->nd_procnum = NFSPROC_NOOP;
   1720  1.24      fvdl 				return (0);
   1721  1.24      fvdl 			}
   1722  1.24      fvdl 			cp += NFSX_UNSIGNED;
   1723  1.24      fvdl 			bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
   1724  1.24      fvdl 			nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
   1725  1.24      fvdl 			nd->nd_flag |= ND_KERBFULL;
   1726  1.24      fvdl 			nfsd->nfsd_flag |= NFSD_NEEDAUTH;
   1727  1.24      fvdl 			break;
   1728  1.24      fvdl 		case RPCAKN_NICKNAME:
   1729  1.24      fvdl 			if (len != 2 * NFSX_UNSIGNED) {
   1730  1.31  christos 				printf("Kerb nickname short\n");
   1731  1.24      fvdl 				nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
   1732  1.24      fvdl 				nd->nd_procnum = NFSPROC_NOOP;
   1733  1.24      fvdl 				return (0);
   1734  1.24      fvdl 			}
   1735  1.24      fvdl 			nickuid = fxdr_unsigned(uid_t, *tl);
   1736  1.24      fvdl 			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
   1737  1.24      fvdl 			if (*tl++ != rpc_auth_kerb ||
   1738  1.24      fvdl 				fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
   1739  1.31  christos 				printf("Kerb nick verifier bad\n");
   1740  1.24      fvdl 				nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
   1741  1.24      fvdl 				nd->nd_procnum = NFSPROC_NOOP;
   1742  1.24      fvdl 				return (0);
   1743  1.24      fvdl 			}
   1744  1.24      fvdl 			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
   1745  1.24      fvdl 			tvin.tv_sec = *tl++;
   1746  1.24      fvdl 			tvin.tv_usec = *tl;
   1747  1.24      fvdl 
   1748  1.24      fvdl 			for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
   1749  1.24      fvdl 			    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
   1750  1.24      fvdl 				if (nuidp->nu_cr.cr_uid == nickuid &&
   1751  1.24      fvdl 				    (!nd->nd_nam2 ||
   1752  1.24      fvdl 				     netaddr_match(NU_NETFAM(nuidp),
   1753  1.24      fvdl 				      &nuidp->nu_haddr, nd->nd_nam2)))
   1754  1.24      fvdl 					break;
   1755  1.24      fvdl 			}
   1756  1.24      fvdl 			if (!nuidp) {
   1757  1.24      fvdl 				nd->nd_repstat =
   1758  1.24      fvdl 					(NFSERR_AUTHERR|AUTH_REJECTCRED);
   1759  1.24      fvdl 				nd->nd_procnum = NFSPROC_NOOP;
   1760  1.24      fvdl 				return (0);
   1761  1.24      fvdl 			}
   1762  1.24      fvdl 
   1763  1.24      fvdl 			/*
   1764  1.24      fvdl 			 * Now, decrypt the timestamp using the session key
   1765  1.24      fvdl 			 * and validate it.
   1766  1.24      fvdl 			 */
   1767  1.24      fvdl #ifdef NFSKERB
   1768  1.24      fvdl 			XXX
   1769  1.24      fvdl #endif
   1770  1.14   mycroft 
   1771  1.24      fvdl 			tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
   1772  1.24      fvdl 			tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
   1773  1.24      fvdl 			if (nuidp->nu_expire < time.tv_sec ||
   1774  1.24      fvdl 			    nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
   1775  1.24      fvdl 			    (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
   1776  1.24      fvdl 			     nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
   1777  1.24      fvdl 				nuidp->nu_expire = 0;
   1778  1.24      fvdl 				nd->nd_repstat =
   1779  1.24      fvdl 				    (NFSERR_AUTHERR|AUTH_REJECTVERF);
   1780  1.24      fvdl 				nd->nd_procnum = NFSPROC_NOOP;
   1781  1.24      fvdl 				return (0);
   1782  1.24      fvdl 			}
   1783  1.24      fvdl 			nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
   1784  1.24      fvdl 			nd->nd_flag |= ND_KERBNICK;
   1785  1.24      fvdl 		};
   1786  1.24      fvdl 	} else {
   1787  1.24      fvdl 		nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
   1788  1.24      fvdl 		nd->nd_procnum = NFSPROC_NOOP;
   1789  1.24      fvdl 		return (0);
   1790  1.14   mycroft 	}
   1791  1.14   mycroft 
   1792  1.14   mycroft 	/*
   1793  1.14   mycroft 	 * For nqnfs, get piggybacked lease request.
   1794  1.14   mycroft 	 */
   1795  1.14   mycroft 	if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
   1796  1.22       cgd 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
   1797  1.24      fvdl 		nd->nd_flag |= fxdr_unsigned(int, *tl);
   1798  1.24      fvdl 		if (nd->nd_flag & ND_LEASE) {
   1799  1.22       cgd 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
   1800  1.24      fvdl 			nd->nd_duration = fxdr_unsigned(u_int32_t, *tl);
   1801  1.14   mycroft 		} else
   1802  1.14   mycroft 			nd->nd_duration = NQ_MINLEASE;
   1803  1.24      fvdl 	} else
   1804  1.14   mycroft 		nd->nd_duration = NQ_MINLEASE;
   1805  1.14   mycroft 	nd->nd_md = md;
   1806  1.14   mycroft 	nd->nd_dpos = dpos;
   1807  1.14   mycroft 	return (0);
   1808  1.14   mycroft nfsmout:
   1809  1.14   mycroft 	return (error);
   1810   1.1       cgd }
   1811   1.1       cgd 
   1812  1.24      fvdl int
   1813   1.1       cgd nfs_msg(p, server, msg)
   1814   1.1       cgd 	struct proc *p;
   1815   1.1       cgd 	char *server, *msg;
   1816   1.1       cgd {
   1817   1.1       cgd 	tpr_t tpr;
   1818   1.1       cgd 
   1819   1.1       cgd 	if (p)
   1820   1.1       cgd 		tpr = tprintf_open(p);
   1821   1.1       cgd 	else
   1822   1.1       cgd 		tpr = NULL;
   1823   1.1       cgd 	tprintf(tpr, "nfs server %s: %s\n", server, msg);
   1824   1.1       cgd 	tprintf_close(tpr);
   1825  1.24      fvdl 	return (0);
   1826   1.1       cgd }
   1827   1.1       cgd 
   1828  1.14   mycroft #ifdef NFSSERVER
   1829  1.24      fvdl int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *,
   1830  1.24      fvdl 				    struct nfssvc_sock *, struct proc *,
   1831  1.23  christos 				    struct mbuf **)) = {
   1832  1.14   mycroft 	nfsrv_null,
   1833  1.14   mycroft 	nfsrv_getattr,
   1834  1.14   mycroft 	nfsrv_setattr,
   1835  1.14   mycroft 	nfsrv_lookup,
   1836  1.24      fvdl 	nfsrv3_access,
   1837  1.14   mycroft 	nfsrv_readlink,
   1838  1.14   mycroft 	nfsrv_read,
   1839  1.14   mycroft 	nfsrv_write,
   1840  1.14   mycroft 	nfsrv_create,
   1841  1.24      fvdl 	nfsrv_mkdir,
   1842  1.24      fvdl 	nfsrv_symlink,
   1843  1.24      fvdl 	nfsrv_mknod,
   1844  1.14   mycroft 	nfsrv_remove,
   1845  1.24      fvdl 	nfsrv_rmdir,
   1846  1.14   mycroft 	nfsrv_rename,
   1847  1.14   mycroft 	nfsrv_link,
   1848  1.14   mycroft 	nfsrv_readdir,
   1849  1.24      fvdl 	nfsrv_readdirplus,
   1850  1.14   mycroft 	nfsrv_statfs,
   1851  1.24      fvdl 	nfsrv_fsinfo,
   1852  1.24      fvdl 	nfsrv_pathconf,
   1853  1.24      fvdl 	nfsrv_commit,
   1854  1.14   mycroft 	nqnfsrv_getlease,
   1855  1.14   mycroft 	nqnfsrv_vacated,
   1856  1.14   mycroft 	nfsrv_noop,
   1857  1.24      fvdl 	nfsrv_noop
   1858  1.14   mycroft };
   1859  1.14   mycroft 
   1860   1.1       cgd /*
   1861  1.14   mycroft  * Socket upcall routine for the nfsd sockets.
   1862  1.14   mycroft  * The caddr_t arg is a pointer to the "struct nfssvc_sock".
   1863  1.14   mycroft  * Essentially do as much as possible non-blocking, else punt and it will
   1864  1.14   mycroft  * be called with M_WAIT from an nfsd.
   1865   1.1       cgd  */
   1866  1.14   mycroft void
   1867  1.14   mycroft nfsrv_rcv(so, arg, waitflag)
   1868  1.14   mycroft 	struct socket *so;
   1869  1.14   mycroft 	caddr_t arg;
   1870  1.14   mycroft 	int waitflag;
   1871   1.1       cgd {
   1872  1.14   mycroft 	register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
   1873  1.14   mycroft 	register struct mbuf *m;
   1874  1.14   mycroft 	struct mbuf *mp, *nam;
   1875  1.14   mycroft 	struct uio auio;
   1876  1.14   mycroft 	int flags, error;
   1877   1.1       cgd 
   1878  1.14   mycroft 	if ((slp->ns_flag & SLP_VALID) == 0)
   1879  1.14   mycroft 		return;
   1880  1.14   mycroft #ifdef notdef
   1881  1.14   mycroft 	/*
   1882  1.14   mycroft 	 * Define this to test for nfsds handling this under heavy load.
   1883  1.14   mycroft 	 */
   1884  1.14   mycroft 	if (waitflag == M_DONTWAIT) {
   1885  1.14   mycroft 		slp->ns_flag |= SLP_NEEDQ; goto dorecs;
   1886   1.1       cgd 	}
   1887  1.14   mycroft #endif
   1888  1.14   mycroft 	auio.uio_procp = NULL;
   1889  1.14   mycroft 	if (so->so_type == SOCK_STREAM) {
   1890  1.14   mycroft 		/*
   1891  1.14   mycroft 		 * If there are already records on the queue, defer soreceive()
   1892  1.14   mycroft 		 * to an nfsd so that there is feedback to the TCP layer that
   1893  1.14   mycroft 		 * the nfs servers are heavily loaded.
   1894  1.14   mycroft 		 */
   1895  1.14   mycroft 		if (slp->ns_rec && waitflag == M_DONTWAIT) {
   1896  1.14   mycroft 			slp->ns_flag |= SLP_NEEDQ;
   1897  1.14   mycroft 			goto dorecs;
   1898  1.14   mycroft 		}
   1899  1.14   mycroft 
   1900  1.14   mycroft 		/*
   1901  1.14   mycroft 		 * Do soreceive().
   1902  1.14   mycroft 		 */
   1903  1.14   mycroft 		auio.uio_resid = 1000000000;
   1904  1.14   mycroft 		flags = MSG_DONTWAIT;
   1905  1.14   mycroft 		error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
   1906  1.14   mycroft 		if (error || mp == (struct mbuf *)0) {
   1907  1.14   mycroft 			if (error == EWOULDBLOCK)
   1908  1.14   mycroft 				slp->ns_flag |= SLP_NEEDQ;
   1909  1.14   mycroft 			else
   1910  1.14   mycroft 				slp->ns_flag |= SLP_DISCONN;
   1911  1.14   mycroft 			goto dorecs;
   1912  1.14   mycroft 		}
   1913  1.14   mycroft 		m = mp;
   1914  1.14   mycroft 		if (slp->ns_rawend) {
   1915  1.14   mycroft 			slp->ns_rawend->m_next = m;
   1916  1.14   mycroft 			slp->ns_cc += 1000000000 - auio.uio_resid;
   1917  1.14   mycroft 		} else {
   1918  1.14   mycroft 			slp->ns_raw = m;
   1919  1.14   mycroft 			slp->ns_cc = 1000000000 - auio.uio_resid;
   1920  1.14   mycroft 		}
   1921  1.14   mycroft 		while (m->m_next)
   1922  1.14   mycroft 			m = m->m_next;
   1923  1.14   mycroft 		slp->ns_rawend = m;
   1924  1.14   mycroft 
   1925  1.14   mycroft 		/*
   1926  1.14   mycroft 		 * Now try and parse record(s) out of the raw stream data.
   1927  1.14   mycroft 		 */
   1928  1.24      fvdl 		error = nfsrv_getstream(slp, waitflag);
   1929  1.24      fvdl 		if (error) {
   1930  1.14   mycroft 			if (error == EPERM)
   1931  1.14   mycroft 				slp->ns_flag |= SLP_DISCONN;
   1932  1.14   mycroft 			else
   1933  1.14   mycroft 				slp->ns_flag |= SLP_NEEDQ;
   1934  1.14   mycroft 		}
   1935  1.14   mycroft 	} else {
   1936  1.14   mycroft 		do {
   1937  1.14   mycroft 			auio.uio_resid = 1000000000;
   1938  1.14   mycroft 			flags = MSG_DONTWAIT;
   1939  1.14   mycroft 			error = soreceive(so, &nam, &auio, &mp,
   1940  1.14   mycroft 						(struct mbuf **)0, &flags);
   1941  1.14   mycroft 			if (mp) {
   1942  1.14   mycroft 				nfs_realign(mp, 10 * NFSX_UNSIGNED);
   1943  1.14   mycroft 				if (nam) {
   1944  1.14   mycroft 					m = nam;
   1945  1.14   mycroft 					m->m_next = mp;
   1946  1.14   mycroft 				} else
   1947  1.14   mycroft 					m = mp;
   1948  1.14   mycroft 				if (slp->ns_recend)
   1949  1.14   mycroft 					slp->ns_recend->m_nextpkt = m;
   1950  1.14   mycroft 				else
   1951  1.14   mycroft 					slp->ns_rec = m;
   1952  1.14   mycroft 				slp->ns_recend = m;
   1953  1.14   mycroft 				m->m_nextpkt = (struct mbuf *)0;
   1954  1.14   mycroft 			}
   1955  1.14   mycroft 			if (error) {
   1956  1.14   mycroft 				if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
   1957  1.14   mycroft 					&& error != EWOULDBLOCK) {
   1958  1.14   mycroft 					slp->ns_flag |= SLP_DISCONN;
   1959  1.14   mycroft 					goto dorecs;
   1960  1.14   mycroft 				}
   1961  1.14   mycroft 			}
   1962  1.14   mycroft 		} while (mp);
   1963  1.14   mycroft 	}
   1964  1.14   mycroft 
   1965  1.14   mycroft 	/*
   1966  1.14   mycroft 	 * Now try and process the request records, non-blocking.
   1967  1.14   mycroft 	 */
   1968  1.14   mycroft dorecs:
   1969  1.14   mycroft 	if (waitflag == M_DONTWAIT &&
   1970  1.14   mycroft 		(slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
   1971  1.14   mycroft 		nfsrv_wakenfsd(slp);
   1972   1.1       cgd }
   1973   1.1       cgd 
   1974   1.1       cgd /*
   1975  1.14   mycroft  * Try and extract an RPC request from the mbuf data list received on a
   1976  1.14   mycroft  * stream socket. The "waitflag" argument indicates whether or not it
   1977  1.14   mycroft  * can sleep.
   1978  1.14   mycroft  */
   1979  1.23  christos int
   1980  1.14   mycroft nfsrv_getstream(slp, waitflag)
   1981  1.14   mycroft 	register struct nfssvc_sock *slp;
   1982  1.14   mycroft 	int waitflag;
   1983   1.1       cgd {
   1984  1.24      fvdl 	register struct mbuf *m, **mpp;
   1985  1.14   mycroft 	register char *cp1, *cp2;
   1986  1.14   mycroft 	register int len;
   1987  1.23  christos 	struct mbuf *om, *m2, *recm = NULL;
   1988  1.24      fvdl 	u_int32_t recmark;
   1989   1.1       cgd 
   1990  1.14   mycroft 	if (slp->ns_flag & SLP_GETSTREAM)
   1991  1.14   mycroft 		panic("nfs getstream");
   1992  1.14   mycroft 	slp->ns_flag |= SLP_GETSTREAM;
   1993  1.14   mycroft 	for (;;) {
   1994  1.14   mycroft 	    if (slp->ns_reclen == 0) {
   1995  1.14   mycroft 		if (slp->ns_cc < NFSX_UNSIGNED) {
   1996  1.14   mycroft 			slp->ns_flag &= ~SLP_GETSTREAM;
   1997  1.14   mycroft 			return (0);
   1998  1.14   mycroft 		}
   1999  1.14   mycroft 		m = slp->ns_raw;
   2000  1.14   mycroft 		if (m->m_len >= NFSX_UNSIGNED) {
   2001  1.14   mycroft 			bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
   2002  1.14   mycroft 			m->m_data += NFSX_UNSIGNED;
   2003  1.14   mycroft 			m->m_len -= NFSX_UNSIGNED;
   2004  1.14   mycroft 		} else {
   2005  1.14   mycroft 			cp1 = (caddr_t)&recmark;
   2006  1.14   mycroft 			cp2 = mtod(m, caddr_t);
   2007  1.14   mycroft 			while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
   2008  1.14   mycroft 				while (m->m_len == 0) {
   2009  1.14   mycroft 					m = m->m_next;
   2010  1.14   mycroft 					cp2 = mtod(m, caddr_t);
   2011  1.14   mycroft 				}
   2012  1.14   mycroft 				*cp1++ = *cp2++;
   2013  1.14   mycroft 				m->m_data++;
   2014  1.14   mycroft 				m->m_len--;
   2015  1.14   mycroft 			}
   2016  1.14   mycroft 		}
   2017  1.14   mycroft 		slp->ns_cc -= NFSX_UNSIGNED;
   2018  1.24      fvdl 		recmark = ntohl(recmark);
   2019  1.24      fvdl 		slp->ns_reclen = recmark & ~0x80000000;
   2020  1.24      fvdl 		if (recmark & 0x80000000)
   2021  1.24      fvdl 			slp->ns_flag |= SLP_LASTFRAG;
   2022  1.24      fvdl 		else
   2023  1.24      fvdl 			slp->ns_flag &= ~SLP_LASTFRAG;
   2024  1.29      fvdl 		if (slp->ns_reclen > NFS_MAXPACKET) {
   2025  1.14   mycroft 			slp->ns_flag &= ~SLP_GETSTREAM;
   2026  1.14   mycroft 			return (EPERM);
   2027  1.14   mycroft 		}
   2028  1.14   mycroft 	    }
   2029  1.14   mycroft 
   2030  1.14   mycroft 	    /*
   2031  1.14   mycroft 	     * Now get the record part.
   2032  1.14   mycroft 	     */
   2033  1.14   mycroft 	    if (slp->ns_cc == slp->ns_reclen) {
   2034  1.14   mycroft 		recm = slp->ns_raw;
   2035  1.14   mycroft 		slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
   2036  1.14   mycroft 		slp->ns_cc = slp->ns_reclen = 0;
   2037  1.14   mycroft 	    } else if (slp->ns_cc > slp->ns_reclen) {
   2038  1.14   mycroft 		len = 0;
   2039  1.14   mycroft 		m = slp->ns_raw;
   2040  1.14   mycroft 		om = (struct mbuf *)0;
   2041  1.14   mycroft 		while (len < slp->ns_reclen) {
   2042  1.14   mycroft 			if ((len + m->m_len) > slp->ns_reclen) {
   2043  1.14   mycroft 				m2 = m_copym(m, 0, slp->ns_reclen - len,
   2044  1.14   mycroft 					waitflag);
   2045  1.14   mycroft 				if (m2) {
   2046  1.14   mycroft 					if (om) {
   2047  1.14   mycroft 						om->m_next = m2;
   2048  1.14   mycroft 						recm = slp->ns_raw;
   2049  1.14   mycroft 					} else
   2050  1.14   mycroft 						recm = m2;
   2051  1.14   mycroft 					m->m_data += slp->ns_reclen - len;
   2052  1.14   mycroft 					m->m_len -= slp->ns_reclen - len;
   2053  1.14   mycroft 					len = slp->ns_reclen;
   2054  1.14   mycroft 				} else {
   2055  1.14   mycroft 					slp->ns_flag &= ~SLP_GETSTREAM;
   2056  1.14   mycroft 					return (EWOULDBLOCK);
   2057  1.14   mycroft 				}
   2058  1.14   mycroft 			} else if ((len + m->m_len) == slp->ns_reclen) {
   2059  1.14   mycroft 				om = m;
   2060  1.14   mycroft 				len += m->m_len;
   2061  1.14   mycroft 				m = m->m_next;
   2062  1.14   mycroft 				recm = slp->ns_raw;
   2063  1.14   mycroft 				om->m_next = (struct mbuf *)0;
   2064  1.14   mycroft 			} else {
   2065  1.14   mycroft 				om = m;
   2066  1.14   mycroft 				len += m->m_len;
   2067  1.14   mycroft 				m = m->m_next;
   2068  1.14   mycroft 			}
   2069  1.14   mycroft 		}
   2070  1.14   mycroft 		slp->ns_raw = m;
   2071  1.14   mycroft 		slp->ns_cc -= len;
   2072  1.14   mycroft 		slp->ns_reclen = 0;
   2073  1.14   mycroft 	    } else {
   2074  1.14   mycroft 		slp->ns_flag &= ~SLP_GETSTREAM;
   2075  1.14   mycroft 		return (0);
   2076  1.14   mycroft 	    }
   2077  1.24      fvdl 
   2078  1.24      fvdl 	    /*
   2079  1.24      fvdl 	     * Accumulate the fragments into a record.
   2080  1.24      fvdl 	     */
   2081  1.24      fvdl 	    mpp = &slp->ns_frag;
   2082  1.24      fvdl 	    while (*mpp)
   2083  1.24      fvdl 		mpp = &((*mpp)->m_next);
   2084  1.24      fvdl 	    *mpp = recm;
   2085  1.24      fvdl 	    if (slp->ns_flag & SLP_LASTFRAG) {
   2086  1.24      fvdl 		nfs_realign(slp->ns_frag, 10 * NFSX_UNSIGNED);
   2087  1.24      fvdl 		if (slp->ns_recend)
   2088  1.24      fvdl 		    slp->ns_recend->m_nextpkt = slp->ns_frag;
   2089  1.24      fvdl 		else
   2090  1.24      fvdl 		    slp->ns_rec = slp->ns_frag;
   2091  1.24      fvdl 		slp->ns_recend = slp->ns_frag;
   2092  1.24      fvdl 		slp->ns_frag = (struct mbuf *)0;
   2093  1.24      fvdl 	    }
   2094   1.1       cgd 	}
   2095   1.1       cgd }
   2096   1.1       cgd 
   2097   1.1       cgd /*
   2098  1.14   mycroft  * Parse an RPC header.
   2099  1.14   mycroft  */
   2100  1.23  christos int
   2101  1.24      fvdl nfsrv_dorec(slp, nfsd, ndp)
   2102  1.14   mycroft 	register struct nfssvc_sock *slp;
   2103  1.24      fvdl 	struct nfsd *nfsd;
   2104  1.24      fvdl 	struct nfsrv_descript **ndp;
   2105  1.14   mycroft {
   2106  1.24      fvdl 	register struct mbuf *m, *nam;
   2107  1.24      fvdl 	register struct nfsrv_descript *nd;
   2108  1.14   mycroft 	int error;
   2109   1.1       cgd 
   2110  1.24      fvdl 	*ndp = NULL;
   2111  1.14   mycroft 	if ((slp->ns_flag & SLP_VALID) == 0 ||
   2112  1.14   mycroft 	    (m = slp->ns_rec) == (struct mbuf *)0)
   2113  1.14   mycroft 		return (ENOBUFS);
   2114  1.24      fvdl 	slp->ns_rec = m->m_nextpkt;
   2115  1.24      fvdl 	if (slp->ns_rec)
   2116  1.14   mycroft 		m->m_nextpkt = (struct mbuf *)0;
   2117  1.14   mycroft 	else
   2118  1.14   mycroft 		slp->ns_recend = (struct mbuf *)0;
   2119  1.14   mycroft 	if (m->m_type == MT_SONAME) {
   2120  1.24      fvdl 		nam = m;
   2121  1.24      fvdl 		m = m->m_next;
   2122  1.24      fvdl 		nam->m_next = NULL;
   2123  1.24      fvdl 	} else
   2124  1.24      fvdl 		nam = NULL;
   2125  1.24      fvdl 	MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript),
   2126  1.24      fvdl 		M_NFSRVDESC, M_WAITOK);
   2127  1.24      fvdl 	nd->nd_md = nd->nd_mrep = m;
   2128  1.24      fvdl 	nd->nd_nam2 = nam;
   2129  1.24      fvdl 	nd->nd_dpos = mtod(m, caddr_t);
   2130  1.24      fvdl 	error = nfs_getreq(nd, nfsd, TRUE);
   2131  1.24      fvdl 	if (error) {
   2132  1.24      fvdl 		m_freem(nam);
   2133  1.24      fvdl 		free((caddr_t)nd, M_NFSRVDESC);
   2134  1.14   mycroft 		return (error);
   2135  1.14   mycroft 	}
   2136  1.24      fvdl 	*ndp = nd;
   2137  1.24      fvdl 	nfsd->nfsd_nd = nd;
   2138   1.1       cgd 	return (0);
   2139   1.1       cgd }
   2140   1.1       cgd 
   2141  1.24      fvdl 
   2142   1.1       cgd /*
   2143  1.14   mycroft  * Search for a sleeping nfsd and wake it up.
   2144  1.14   mycroft  * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
   2145  1.14   mycroft  * running nfsds will go look for the work in the nfssvc_sock list.
   2146  1.14   mycroft  */
   2147  1.14   mycroft void
   2148  1.14   mycroft nfsrv_wakenfsd(slp)
   2149  1.14   mycroft 	struct nfssvc_sock *slp;
   2150  1.14   mycroft {
   2151  1.17   mycroft 	register struct nfsd *nd;
   2152  1.14   mycroft 
   2153  1.14   mycroft 	if ((slp->ns_flag & SLP_VALID) == 0)
   2154  1.14   mycroft 		return;
   2155  1.24      fvdl 	for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) {
   2156  1.24      fvdl 		if (nd->nfsd_flag & NFSD_WAITING) {
   2157  1.24      fvdl 			nd->nfsd_flag &= ~NFSD_WAITING;
   2158  1.24      fvdl 			if (nd->nfsd_slp)
   2159  1.14   mycroft 				panic("nfsd wakeup");
   2160  1.14   mycroft 			slp->ns_sref++;
   2161  1.24      fvdl 			nd->nfsd_slp = slp;
   2162  1.14   mycroft 			wakeup((caddr_t)nd);
   2163  1.14   mycroft 			return;
   2164  1.14   mycroft 		}
   2165  1.14   mycroft 	}
   2166  1.14   mycroft 	slp->ns_flag |= SLP_DOREC;
   2167  1.17   mycroft 	nfsd_head_flag |= NFSD_CHECKSLP;
   2168   1.1       cgd }
   2169  1.14   mycroft #endif /* NFSSERVER */
   2170