nfs_socket.c revision 1.102.2.3.2.1 1 1.102.2.3.2.1 jmc /* $NetBSD: nfs_socket.c,v 1.102.2.3.2.1 2005/01/11 06:39:04 jmc Exp $ */
2 1.15 cgd
3 1.1 cgd /*
4 1.24 fvdl * Copyright (c) 1989, 1991, 1993, 1995
5 1.14 mycroft * The Regents of the University of California. All rights reserved.
6 1.1 cgd *
7 1.1 cgd * This code is derived from software contributed to Berkeley by
8 1.1 cgd * Rick Macklem at The University of Guelph.
9 1.1 cgd *
10 1.1 cgd * Redistribution and use in source and binary forms, with or without
11 1.1 cgd * modification, are permitted provided that the following conditions
12 1.1 cgd * are met:
13 1.1 cgd * 1. Redistributions of source code must retain the above copyright
14 1.1 cgd * notice, this list of conditions and the following disclaimer.
15 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 cgd * notice, this list of conditions and the following disclaimer in the
17 1.1 cgd * documentation and/or other materials provided with the distribution.
18 1.96 agc * 3. Neither the name of the University nor the names of its contributors
19 1.1 cgd * may be used to endorse or promote products derived from this software
20 1.1 cgd * without specific prior written permission.
21 1.1 cgd *
22 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 1.1 cgd * SUCH DAMAGE.
33 1.1 cgd *
34 1.24 fvdl * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
35 1.1 cgd */
36 1.1 cgd
37 1.1 cgd /*
38 1.1 cgd * Socket operations for use by nfs
39 1.1 cgd */
40 1.69 lukem
41 1.69 lukem #include <sys/cdefs.h>
42 1.102.2.3.2.1 jmc __KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.102.2.3.2.1 2005/01/11 06:39:04 jmc Exp $");
43 1.42 thorpej
44 1.42 thorpej #include "fs_nfs.h"
45 1.59 bjh21 #include "opt_nfs.h"
46 1.44 thorpej #include "opt_nfsserver.h"
47 1.89 martin #include "opt_mbuftrace.h"
48 1.57 fvdl #include "opt_inet.h"
49 1.1 cgd
50 1.9 mycroft #include <sys/param.h>
51 1.9 mycroft #include <sys/systm.h>
52 1.54 thorpej #include <sys/callout.h>
53 1.9 mycroft #include <sys/proc.h>
54 1.9 mycroft #include <sys/mount.h>
55 1.9 mycroft #include <sys/kernel.h>
56 1.9 mycroft #include <sys/mbuf.h>
57 1.9 mycroft #include <sys/vnode.h>
58 1.9 mycroft #include <sys/domain.h>
59 1.9 mycroft #include <sys/protosw.h>
60 1.9 mycroft #include <sys/socket.h>
61 1.9 mycroft #include <sys/socketvar.h>
62 1.9 mycroft #include <sys/syslog.h>
63 1.9 mycroft #include <sys/tprintf.h>
64 1.23 christos #include <sys/namei.h>
65 1.47 mycroft #include <sys/signal.h>
66 1.47 mycroft #include <sys/signalvar.h>
67 1.1 cgd
68 1.9 mycroft #include <netinet/in.h>
69 1.9 mycroft #include <netinet/tcp.h>
70 1.24 fvdl
71 1.9 mycroft #include <nfs/rpcv2.h>
72 1.24 fvdl #include <nfs/nfsproto.h>
73 1.9 mycroft #include <nfs/nfs.h>
74 1.9 mycroft #include <nfs/xdr_subs.h>
75 1.9 mycroft #include <nfs/nfsm_subs.h>
76 1.9 mycroft #include <nfs/nfsmount.h>
77 1.14 mycroft #include <nfs/nfsnode.h>
78 1.14 mycroft #include <nfs/nfsrtt.h>
79 1.14 mycroft #include <nfs/nqnfs.h>
80 1.23 christos #include <nfs/nfs_var.h>
81 1.78 thorpej
82 1.78 thorpej MALLOC_DEFINE(M_NFSREQ, "NFS req", "NFS request header");
83 1.79 matt #ifdef MBUFTRACE
84 1.79 matt struct mowner nfs_mowner = { "nfs" };
85 1.79 matt #endif
86 1.1 cgd
87 1.1 cgd /*
88 1.14 mycroft * Estimate rto for an nfs rpc sent via. an unreliable datagram.
89 1.14 mycroft * Use the mean and mean deviation of rtt for the appropriate type of rpc
90 1.14 mycroft * for the frequent rpcs and a default for the others.
91 1.14 mycroft * The justification for doing "other" this way is that these rpcs
92 1.14 mycroft * happen so infrequently that timer est. would probably be stale.
93 1.14 mycroft * Also, since many of these rpcs are
94 1.14 mycroft * non-idempotent, a conservative timeout is desired.
95 1.14 mycroft * getattr, lookup - A+2D
96 1.14 mycroft * read, write - A+4D
97 1.14 mycroft * other - nm_timeo
98 1.14 mycroft */
99 1.14 mycroft #define NFS_RTO(n, t) \
100 1.14 mycroft ((t) == 0 ? (n)->nm_timeo : \
101 1.14 mycroft ((t) < 3 ? \
102 1.14 mycroft (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
103 1.14 mycroft ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
104 1.14 mycroft #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
105 1.14 mycroft #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
106 1.14 mycroft /*
107 1.1 cgd * External data, mostly RPC constants in XDR form
108 1.1 cgd */
109 1.22 cgd extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers,
110 1.24 fvdl rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr,
111 1.14 mycroft rpc_auth_kerb;
112 1.24 fvdl extern u_int32_t nfs_prog, nqnfs_prog;
113 1.14 mycroft extern time_t nqnfsstarttime;
114 1.77 matt extern const int nfsv3_procid[NFS_NPROCS];
115 1.24 fvdl extern int nfs_ticks;
116 1.14 mycroft
117 1.14 mycroft /*
118 1.14 mycroft * Defines which timer to use for the procnum.
119 1.14 mycroft * 0 - default
120 1.14 mycroft * 1 - getattr
121 1.14 mycroft * 2 - lookup
122 1.14 mycroft * 3 - read
123 1.14 mycroft * 4 - write
124 1.14 mycroft */
125 1.66 jdolecek static const int proct[NFS_NPROCS] = {
126 1.24 fvdl 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
127 1.24 fvdl 0, 0, 0,
128 1.1 cgd };
129 1.14 mycroft
130 1.14 mycroft /*
131 1.14 mycroft * There is a congestion window for outstanding rpcs maintained per mount
132 1.14 mycroft * point. The cwnd size is adjusted in roughly the way that:
133 1.14 mycroft * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
134 1.14 mycroft * SIGCOMM '88". ACM, August 1988.
135 1.14 mycroft * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
136 1.14 mycroft * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
137 1.14 mycroft * of rpcs is in progress.
138 1.14 mycroft * (The sent count and cwnd are scaled for integer arith.)
139 1.14 mycroft * Variants of "slow start" were tried and were found to be too much of a
140 1.14 mycroft * performance hit (ave. rtt 3 times larger),
141 1.14 mycroft * I suspect due to the large rtt that nfs rpcs have.
142 1.14 mycroft */
143 1.14 mycroft #define NFS_CWNDSCALE 256
144 1.14 mycroft #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
145 1.66 jdolecek static const int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
146 1.14 mycroft int nfsrtton = 0;
147 1.14 mycroft struct nfsrtt nfsrtt;
148 1.74 matt struct nfsreqhead nfs_reqq;
149 1.1 cgd
150 1.99 yamt struct callout nfs_timer_ch = CALLOUT_INITIALIZER_SETFUNC(nfs_timer, NULL);
151 1.54 thorpej
152 1.1 cgd /*
153 1.1 cgd * Initialize sockets and congestion for a new NFS connection.
154 1.1 cgd * We do not free the sockaddr if error.
155 1.1 cgd */
156 1.23 christos int
157 1.14 mycroft nfs_connect(nmp, rep)
158 1.55 augustss struct nfsmount *nmp;
159 1.14 mycroft struct nfsreq *rep;
160 1.1 cgd {
161 1.55 augustss struct socket *so;
162 1.14 mycroft int s, error, rcvreserve, sndreserve;
163 1.11 cgd struct sockaddr *saddr;
164 1.14 mycroft struct sockaddr_in *sin;
165 1.57 fvdl #ifdef INET6
166 1.57 fvdl struct sockaddr_in6 *sin6;
167 1.57 fvdl #endif
168 1.1 cgd struct mbuf *m;
169 1.1 cgd
170 1.1 cgd nmp->nm_so = (struct socket *)0;
171 1.11 cgd saddr = mtod(nmp->nm_nam, struct sockaddr *);
172 1.24 fvdl error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
173 1.24 fvdl nmp->nm_soproto);
174 1.23 christos if (error)
175 1.1 cgd goto bad;
176 1.1 cgd so = nmp->nm_so;
177 1.79 matt #ifdef MBUFTRACE
178 1.79 matt so->so_mowner = &nfs_mowner;
179 1.79 matt so->so_rcv.sb_mowner = &nfs_mowner;
180 1.79 matt so->so_snd.sb_mowner = &nfs_mowner;
181 1.79 matt #endif
182 1.1 cgd nmp->nm_soflags = so->so_proto->pr_flags;
183 1.1 cgd
184 1.2 cgd /*
185 1.2 cgd * Some servers require that the client port be a reserved port number.
186 1.2 cgd */
187 1.14 mycroft if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
188 1.79 matt m = m_get(M_WAIT, MT_SOOPTS);
189 1.79 matt MCLAIM(m, so->so_mowner);
190 1.72 lukem *mtod(m, int32_t *) = IP_PORTRANGE_LOW;
191 1.72 lukem m->m_len = sizeof(int32_t);
192 1.72 lukem if ((error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, m)))
193 1.72 lukem goto bad;
194 1.79 matt m = m_get(M_WAIT, MT_SONAME);
195 1.79 matt MCLAIM(m, so->so_mowner);
196 1.2 cgd sin = mtod(m, struct sockaddr_in *);
197 1.2 cgd sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
198 1.2 cgd sin->sin_family = AF_INET;
199 1.2 cgd sin->sin_addr.s_addr = INADDR_ANY;
200 1.72 lukem sin->sin_port = 0;
201 1.92 fvdl error = sobind(so, m, &proc0);
202 1.2 cgd m_freem(m);
203 1.14 mycroft if (error)
204 1.14 mycroft goto bad;
205 1.2 cgd }
206 1.57 fvdl #ifdef INET6
207 1.57 fvdl if (saddr->sa_family == AF_INET6 && (nmp->nm_flag & NFSMNT_RESVPORT)) {
208 1.79 matt m = m_get(M_WAIT, MT_SOOPTS);
209 1.79 matt MCLAIM(m, so->so_mowner);
210 1.72 lukem *mtod(m, int32_t *) = IPV6_PORTRANGE_LOW;
211 1.72 lukem m->m_len = sizeof(int32_t);
212 1.72 lukem if ((error = sosetopt(so, IPPROTO_IPV6, IPV6_PORTRANGE, m)))
213 1.72 lukem goto bad;
214 1.79 matt m = m_get(M_WAIT, MT_SONAME);
215 1.79 matt MCLAIM(m, so->so_mowner);
216 1.57 fvdl sin6 = mtod(m, struct sockaddr_in6 *);
217 1.57 fvdl sin6->sin6_len = m->m_len = sizeof (struct sockaddr_in6);
218 1.57 fvdl sin6->sin6_family = AF_INET6;
219 1.57 fvdl sin6->sin6_addr = in6addr_any;
220 1.72 lukem sin6->sin6_port = 0;
221 1.92 fvdl error = sobind(so, m, &proc0);
222 1.57 fvdl m_freem(m);
223 1.57 fvdl if (error)
224 1.57 fvdl goto bad;
225 1.57 fvdl }
226 1.57 fvdl #endif
227 1.2 cgd
228 1.1 cgd /*
229 1.1 cgd * Protocols that do not require connections may be optionally left
230 1.1 cgd * unconnected for servers that reply from a port other than NFS_PORT.
231 1.1 cgd */
232 1.1 cgd if (nmp->nm_flag & NFSMNT_NOCONN) {
233 1.1 cgd if (nmp->nm_soflags & PR_CONNREQUIRED) {
234 1.1 cgd error = ENOTCONN;
235 1.1 cgd goto bad;
236 1.1 cgd }
237 1.1 cgd } else {
238 1.24 fvdl error = soconnect(so, nmp->nm_nam);
239 1.24 fvdl if (error)
240 1.1 cgd goto bad;
241 1.1 cgd
242 1.1 cgd /*
243 1.1 cgd * Wait for the connection to complete. Cribbed from the
244 1.14 mycroft * connect system call but with the wait timing out so
245 1.14 mycroft * that interruptible mounts don't hang here for a long time.
246 1.1 cgd */
247 1.21 mycroft s = splsoftnet();
248 1.14 mycroft while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
249 1.14 mycroft (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
250 1.48 fvdl "nfscn1", 2 * hz);
251 1.14 mycroft if ((so->so_state & SS_ISCONNECTING) &&
252 1.14 mycroft so->so_error == 0 && rep &&
253 1.92 fvdl (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){
254 1.14 mycroft so->so_state &= ~SS_ISCONNECTING;
255 1.14 mycroft splx(s);
256 1.14 mycroft goto bad;
257 1.14 mycroft }
258 1.14 mycroft }
259 1.1 cgd if (so->so_error) {
260 1.1 cgd error = so->so_error;
261 1.14 mycroft so->so_error = 0;
262 1.14 mycroft splx(s);
263 1.1 cgd goto bad;
264 1.1 cgd }
265 1.14 mycroft splx(s);
266 1.14 mycroft }
267 1.14 mycroft if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
268 1.14 mycroft so->so_rcv.sb_timeo = (5 * hz);
269 1.14 mycroft so->so_snd.sb_timeo = (5 * hz);
270 1.14 mycroft } else {
271 1.102.2.1 tron /*
272 1.102.2.1 tron * enable receive timeout to detect server crash and reconnect.
273 1.102.2.1 tron * otherwise, we can be stuck in soreceive forever.
274 1.102.2.1 tron */
275 1.102.2.1 tron so->so_rcv.sb_timeo = (5 * hz);
276 1.14 mycroft so->so_snd.sb_timeo = 0;
277 1.1 cgd }
278 1.1 cgd if (nmp->nm_sotype == SOCK_DGRAM) {
279 1.37 fvdl sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
280 1.37 fvdl rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
281 1.37 fvdl NFS_MAXPKTHDR) * 2;
282 1.14 mycroft } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
283 1.26 fvdl sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
284 1.26 fvdl rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
285 1.25 fvdl NFS_MAXPKTHDR) * 2;
286 1.1 cgd } else {
287 1.14 mycroft if (nmp->nm_sotype != SOCK_STREAM)
288 1.14 mycroft panic("nfscon sotype");
289 1.1 cgd if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
290 1.79 matt m = m_get(M_WAIT, MT_SOOPTS);
291 1.79 matt MCLAIM(m, so->so_mowner);
292 1.22 cgd *mtod(m, int32_t *) = 1;
293 1.22 cgd m->m_len = sizeof(int32_t);
294 1.1 cgd sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
295 1.1 cgd }
296 1.14 mycroft if (so->so_proto->pr_protocol == IPPROTO_TCP) {
297 1.79 matt m = m_get(M_WAIT, MT_SOOPTS);
298 1.79 matt MCLAIM(m, so->so_mowner);
299 1.22 cgd *mtod(m, int32_t *) = 1;
300 1.22 cgd m->m_len = sizeof(int32_t);
301 1.1 cgd sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
302 1.1 cgd }
303 1.22 cgd sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
304 1.22 cgd sizeof (u_int32_t)) * 2;
305 1.22 cgd rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
306 1.22 cgd sizeof (u_int32_t)) * 2;
307 1.1 cgd }
308 1.24 fvdl error = soreserve(so, sndreserve, rcvreserve);
309 1.24 fvdl if (error)
310 1.14 mycroft goto bad;
311 1.1 cgd so->so_rcv.sb_flags |= SB_NOINTR;
312 1.1 cgd so->so_snd.sb_flags |= SB_NOINTR;
313 1.1 cgd
314 1.1 cgd /* Initialize other non-zero congestion variables */
315 1.14 mycroft nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
316 1.68 simonb NFS_TIMEO << 3;
317 1.14 mycroft nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
318 1.68 simonb nmp->nm_sdrtt[3] = 0;
319 1.14 mycroft nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
320 1.1 cgd nmp->nm_sent = 0;
321 1.14 mycroft nmp->nm_timeouts = 0;
322 1.1 cgd return (0);
323 1.1 cgd
324 1.1 cgd bad:
325 1.1 cgd nfs_disconnect(nmp);
326 1.1 cgd return (error);
327 1.1 cgd }
328 1.1 cgd
329 1.1 cgd /*
330 1.1 cgd * Reconnect routine:
331 1.1 cgd * Called when a connection is broken on a reliable protocol.
332 1.1 cgd * - clean up the old socket
333 1.1 cgd * - nfs_connect() again
334 1.1 cgd * - set R_MUSTRESEND for all outstanding requests on mount point
335 1.1 cgd * If this fails the mount point is DEAD!
336 1.14 mycroft * nb: Must be called with the nfs_sndlock() set on the mount point.
337 1.1 cgd */
338 1.23 christos int
339 1.14 mycroft nfs_reconnect(rep)
340 1.55 augustss struct nfsreq *rep;
341 1.1 cgd {
342 1.55 augustss struct nfsreq *rp;
343 1.55 augustss struct nfsmount *nmp = rep->r_nmp;
344 1.1 cgd int error;
345 1.1 cgd
346 1.14 mycroft nfs_disconnect(nmp);
347 1.23 christos while ((error = nfs_connect(nmp, rep)) != 0) {
348 1.14 mycroft if (error == EINTR || error == ERESTART)
349 1.1 cgd return (EINTR);
350 1.48 fvdl (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscn2", 0);
351 1.1 cgd }
352 1.1 cgd
353 1.1 cgd /*
354 1.1 cgd * Loop through outstanding request list and fix up all requests
355 1.1 cgd * on old socket.
356 1.1 cgd */
357 1.73 christos TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
358 1.102.2.1 tron if (rp->r_nmp == nmp) {
359 1.102.2.3.2.1 jmc if ((rp->r_flags & R_MUSTRESEND) == 0)
360 1.102.2.3.2.1 jmc rp->r_flags |= R_MUSTRESEND | R_REXMITTED;
361 1.102.2.1 tron rp->r_rexmit = 0;
362 1.102.2.1 tron }
363 1.1 cgd }
364 1.1 cgd return (0);
365 1.1 cgd }
366 1.1 cgd
367 1.1 cgd /*
368 1.1 cgd * NFS disconnect. Clean up and unlink.
369 1.1 cgd */
370 1.1 cgd void
371 1.1 cgd nfs_disconnect(nmp)
372 1.55 augustss struct nfsmount *nmp;
373 1.1 cgd {
374 1.55 augustss struct socket *so;
375 1.53 sommerfe int drain = 0;
376 1.53 sommerfe
377 1.1 cgd if (nmp->nm_so) {
378 1.1 cgd so = nmp->nm_so;
379 1.1 cgd nmp->nm_so = (struct socket *)0;
380 1.1 cgd soshutdown(so, 2);
381 1.53 sommerfe drain = (nmp->nm_iflag & NFSMNT_DISMNT) != 0;
382 1.53 sommerfe if (drain) {
383 1.51 sommerfe /*
384 1.51 sommerfe * soshutdown() above should wake up the current
385 1.51 sommerfe * listener.
386 1.71 minoura * Now wake up those waiting for the receive lock, and
387 1.51 sommerfe * wait for them to go away unhappy, to prevent *nmp
388 1.51 sommerfe * from evaporating while they're sleeping.
389 1.51 sommerfe */
390 1.51 sommerfe while (nmp->nm_waiters > 0) {
391 1.51 sommerfe wakeup (&nmp->nm_iflag);
392 1.56 thorpej (void) tsleep(&nmp->nm_waiters, PVFS,
393 1.56 thorpej "nfsdis", 0);
394 1.51 sommerfe }
395 1.51 sommerfe }
396 1.1 cgd soclose(so);
397 1.41 fvdl }
398 1.51 sommerfe #ifdef DIAGNOSTIC
399 1.53 sommerfe if (drain && (nmp->nm_waiters > 0))
400 1.76 provos panic("nfs_disconnect: waiters left after drain?");
401 1.51 sommerfe #endif
402 1.41 fvdl }
403 1.40 fvdl
404 1.41 fvdl void
405 1.41 fvdl nfs_safedisconnect(nmp)
406 1.41 fvdl struct nfsmount *nmp;
407 1.41 fvdl {
408 1.41 fvdl struct nfsreq dummyreq;
409 1.41 fvdl
410 1.46 perry memset(&dummyreq, 0, sizeof(dummyreq));
411 1.41 fvdl dummyreq.r_nmp = nmp;
412 1.51 sommerfe nfs_rcvlock(&dummyreq); /* XXX ignored error return */
413 1.41 fvdl nfs_disconnect(nmp);
414 1.87 yamt nfs_rcvunlock(nmp);
415 1.1 cgd }
416 1.1 cgd
417 1.1 cgd /*
418 1.1 cgd * This is the nfs send routine. For connection based socket types, it
419 1.14 mycroft * must be called with an nfs_sndlock() on the socket.
420 1.1 cgd * "rep == NULL" indicates that it has been called from a server.
421 1.14 mycroft * For the client side:
422 1.14 mycroft * - return EINTR if the RPC is terminated, 0 otherwise
423 1.14 mycroft * - set R_MUSTRESEND if the send fails for any reason
424 1.58 mrg * - do any cleanup required by recoverable socket errors (? ? ?)
425 1.14 mycroft * For the server side:
426 1.14 mycroft * - return EINTR or ERESTART if interrupted by a signal
427 1.14 mycroft * - return EPIPE if a connection is lost for connection based sockets (TCP...)
428 1.58 mrg * - do any cleanup required by recoverable socket errors (? ? ?)
429 1.1 cgd */
430 1.23 christos int
431 1.1 cgd nfs_send(so, nam, top, rep)
432 1.55 augustss struct socket *so;
433 1.1 cgd struct mbuf *nam;
434 1.55 augustss struct mbuf *top;
435 1.1 cgd struct nfsreq *rep;
436 1.1 cgd {
437 1.1 cgd struct mbuf *sendnam;
438 1.14 mycroft int error, soflags, flags;
439 1.1 cgd
440 1.1 cgd if (rep) {
441 1.1 cgd if (rep->r_flags & R_SOFTTERM) {
442 1.1 cgd m_freem(top);
443 1.1 cgd return (EINTR);
444 1.1 cgd }
445 1.14 mycroft if ((so = rep->r_nmp->nm_so) == NULL) {
446 1.14 mycroft rep->r_flags |= R_MUSTRESEND;
447 1.14 mycroft m_freem(top);
448 1.14 mycroft return (0);
449 1.14 mycroft }
450 1.1 cgd rep->r_flags &= ~R_MUSTRESEND;
451 1.1 cgd soflags = rep->r_nmp->nm_soflags;
452 1.1 cgd } else
453 1.1 cgd soflags = so->so_proto->pr_flags;
454 1.1 cgd if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
455 1.1 cgd sendnam = (struct mbuf *)0;
456 1.1 cgd else
457 1.1 cgd sendnam = nam;
458 1.14 mycroft if (so->so_type == SOCK_SEQPACKET)
459 1.14 mycroft flags = MSG_EOR;
460 1.14 mycroft else
461 1.14 mycroft flags = 0;
462 1.1 cgd
463 1.43 matt error = (*so->so_send)(so, sendnam, (struct uio *)0, top,
464 1.14 mycroft (struct mbuf *)0, flags);
465 1.14 mycroft if (error) {
466 1.14 mycroft if (rep) {
467 1.60 fvdl if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
468 1.60 fvdl /*
469 1.60 fvdl * We're too fast for the network/driver,
470 1.60 fvdl * and UDP isn't flowcontrolled.
471 1.60 fvdl * We need to resend. This is not fatal,
472 1.60 fvdl * just try again.
473 1.60 fvdl *
474 1.60 fvdl * Could be smarter here by doing some sort
475 1.60 fvdl * of a backoff, but this is rare.
476 1.60 fvdl */
477 1.14 mycroft rep->r_flags |= R_MUSTRESEND;
478 1.60 fvdl } else {
479 1.101 matt if (error != EPIPE)
480 1.101 matt log(LOG_INFO,
481 1.101 matt "nfs send error %d for %s\n",
482 1.101 matt error,
483 1.101 matt rep->r_nmp->nm_mountp->
484 1.101 matt mnt_stat.f_mntfromname);
485 1.60 fvdl /*
486 1.60 fvdl * Deal with errors for the client side.
487 1.60 fvdl */
488 1.60 fvdl if (rep->r_flags & R_SOFTTERM)
489 1.60 fvdl error = EINTR;
490 1.60 fvdl else
491 1.60 fvdl rep->r_flags |= R_MUSTRESEND;
492 1.60 fvdl }
493 1.67 fvdl } else {
494 1.67 fvdl /*
495 1.67 fvdl * See above. This error can happen under normal
496 1.67 fvdl * circumstances and the log is too noisy.
497 1.67 fvdl * The error will still show up in nfsstat.
498 1.67 fvdl */
499 1.67 fvdl if (error != ENOBUFS || so->so_type != SOCK_DGRAM)
500 1.67 fvdl log(LOG_INFO, "nfsd send error %d\n", error);
501 1.67 fvdl }
502 1.14 mycroft
503 1.14 mycroft /*
504 1.58 mrg * Handle any recoverable (soft) socket errors here. (? ? ?)
505 1.14 mycroft */
506 1.14 mycroft if (error != EINTR && error != ERESTART &&
507 1.14 mycroft error != EWOULDBLOCK && error != EPIPE)
508 1.1 cgd error = 0;
509 1.1 cgd }
510 1.1 cgd return (error);
511 1.1 cgd }
512 1.1 cgd
513 1.32 thorpej #ifdef NFS
514 1.1 cgd /*
515 1.1 cgd * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
516 1.1 cgd * done by soreceive(), but for SOCK_STREAM we must deal with the Record
517 1.1 cgd * Mark and consolidate the data into a new mbuf list.
518 1.1 cgd * nb: Sometimes TCP passes the data up to soreceive() in long lists of
519 1.1 cgd * small mbufs.
520 1.1 cgd * For SOCK_STREAM we must be very careful to read an entire record once
521 1.1 cgd * we have read any of it, even if the system call has been interrupted.
522 1.1 cgd */
523 1.23 christos int
524 1.14 mycroft nfs_receive(rep, aname, mp)
525 1.55 augustss struct nfsreq *rep;
526 1.1 cgd struct mbuf **aname;
527 1.1 cgd struct mbuf **mp;
528 1.1 cgd {
529 1.55 augustss struct socket *so;
530 1.1 cgd struct uio auio;
531 1.1 cgd struct iovec aio;
532 1.55 augustss struct mbuf *m;
533 1.14 mycroft struct mbuf *control;
534 1.22 cgd u_int32_t len;
535 1.1 cgd struct mbuf **getnam;
536 1.14 mycroft int error, sotype, rcvflg;
537 1.92 fvdl struct proc *p = curproc; /* XXX */
538 1.1 cgd
539 1.1 cgd /*
540 1.1 cgd * Set up arguments for soreceive()
541 1.1 cgd */
542 1.1 cgd *mp = (struct mbuf *)0;
543 1.1 cgd *aname = (struct mbuf *)0;
544 1.14 mycroft sotype = rep->r_nmp->nm_sotype;
545 1.1 cgd
546 1.1 cgd /*
547 1.1 cgd * For reliable protocols, lock against other senders/receivers
548 1.1 cgd * in case a reconnect is necessary.
549 1.1 cgd * For SOCK_STREAM, first get the Record Mark to find out how much
550 1.1 cgd * more there is to get.
551 1.1 cgd * We must lock the socket against other receivers
552 1.1 cgd * until we have an entire rpc request/reply.
553 1.1 cgd */
554 1.14 mycroft if (sotype != SOCK_DGRAM) {
555 1.39 fvdl error = nfs_sndlock(&rep->r_nmp->nm_iflag, rep);
556 1.24 fvdl if (error)
557 1.14 mycroft return (error);
558 1.1 cgd tryagain:
559 1.1 cgd /*
560 1.1 cgd * Check for fatal errors and resending request.
561 1.1 cgd */
562 1.14 mycroft /*
563 1.14 mycroft * Ugh: If a reconnect attempt just happened, nm_so
564 1.14 mycroft * would have changed. NULL indicates a failed
565 1.14 mycroft * attempt that has essentially shut down this
566 1.14 mycroft * mount point.
567 1.14 mycroft */
568 1.14 mycroft if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
569 1.39 fvdl nfs_sndunlock(&rep->r_nmp->nm_iflag);
570 1.14 mycroft return (EINTR);
571 1.14 mycroft }
572 1.24 fvdl so = rep->r_nmp->nm_so;
573 1.24 fvdl if (!so) {
574 1.24 fvdl error = nfs_reconnect(rep);
575 1.24 fvdl if (error) {
576 1.39 fvdl nfs_sndunlock(&rep->r_nmp->nm_iflag);
577 1.14 mycroft return (error);
578 1.14 mycroft }
579 1.14 mycroft goto tryagain;
580 1.14 mycroft }
581 1.14 mycroft while (rep->r_flags & R_MUSTRESEND) {
582 1.14 mycroft m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
583 1.14 mycroft nfsstats.rpcretries++;
584 1.102.2.1 tron rep->r_rtt = 0;
585 1.102.2.1 tron rep->r_flags &= ~R_TIMING;
586 1.23 christos error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
587 1.23 christos if (error) {
588 1.14 mycroft if (error == EINTR || error == ERESTART ||
589 1.24 fvdl (error = nfs_reconnect(rep)) != 0) {
590 1.39 fvdl nfs_sndunlock(&rep->r_nmp->nm_iflag);
591 1.14 mycroft return (error);
592 1.14 mycroft }
593 1.14 mycroft goto tryagain;
594 1.1 cgd }
595 1.1 cgd }
596 1.39 fvdl nfs_sndunlock(&rep->r_nmp->nm_iflag);
597 1.14 mycroft if (sotype == SOCK_STREAM) {
598 1.1 cgd aio.iov_base = (caddr_t) &len;
599 1.22 cgd aio.iov_len = sizeof(u_int32_t);
600 1.1 cgd auio.uio_iov = &aio;
601 1.1 cgd auio.uio_iovcnt = 1;
602 1.1 cgd auio.uio_segflg = UIO_SYSSPACE;
603 1.1 cgd auio.uio_rw = UIO_READ;
604 1.1 cgd auio.uio_offset = 0;
605 1.22 cgd auio.uio_resid = sizeof(u_int32_t);
606 1.92 fvdl auio.uio_procp = p;
607 1.1 cgd do {
608 1.14 mycroft rcvflg = MSG_WAITALL;
609 1.43 matt error = (*so->so_receive)(so, (struct mbuf **)0, &auio,
610 1.1 cgd (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
611 1.14 mycroft if (error == EWOULDBLOCK && rep) {
612 1.1 cgd if (rep->r_flags & R_SOFTTERM)
613 1.1 cgd return (EINTR);
614 1.102.2.1 tron /*
615 1.102.2.1 tron * if it seems that the server died after it
616 1.102.2.1 tron * received our request, set EPIPE so that
617 1.102.2.1 tron * we'll reconnect and retransmit requests.
618 1.102.2.1 tron */
619 1.102.2.1 tron if (rep->r_rexmit >= rep->r_nmp->nm_retry) {
620 1.102.2.1 tron nfsstats.rpctimeouts++;
621 1.102.2.1 tron error = EPIPE;
622 1.102.2.1 tron }
623 1.14 mycroft }
624 1.1 cgd } while (error == EWOULDBLOCK);
625 1.1 cgd if (!error && auio.uio_resid > 0) {
626 1.41 fvdl /*
627 1.41 fvdl * Don't log a 0 byte receive; it means
628 1.41 fvdl * that the socket has been closed, and
629 1.41 fvdl * can happen during normal operation
630 1.41 fvdl * (forcible unmount or Solaris server).
631 1.41 fvdl */
632 1.41 fvdl if (auio.uio_resid != sizeof (u_int32_t))
633 1.41 fvdl log(LOG_INFO,
634 1.49 thorpej "short receive (%lu/%lu) from nfs server %s\n",
635 1.49 thorpej (u_long)sizeof(u_int32_t) - auio.uio_resid,
636 1.49 thorpej (u_long)sizeof(u_int32_t),
637 1.1 cgd rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
638 1.1 cgd error = EPIPE;
639 1.1 cgd }
640 1.1 cgd if (error)
641 1.1 cgd goto errout;
642 1.1 cgd len = ntohl(len) & ~0x80000000;
643 1.1 cgd /*
644 1.1 cgd * This is SERIOUS! We are out of sync with the sender
645 1.1 cgd * and forcing a disconnect/reconnect is all I can do.
646 1.1 cgd */
647 1.1 cgd if (len > NFS_MAXPACKET) {
648 1.14 mycroft log(LOG_ERR, "%s (%d) from nfs server %s\n",
649 1.14 mycroft "impossible packet length",
650 1.14 mycroft len,
651 1.14 mycroft rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
652 1.1 cgd error = EFBIG;
653 1.1 cgd goto errout;
654 1.1 cgd }
655 1.1 cgd auio.uio_resid = len;
656 1.1 cgd do {
657 1.1 cgd rcvflg = MSG_WAITALL;
658 1.43 matt error = (*so->so_receive)(so, (struct mbuf **)0,
659 1.1 cgd &auio, mp, (struct mbuf **)0, &rcvflg);
660 1.1 cgd } while (error == EWOULDBLOCK || error == EINTR ||
661 1.1 cgd error == ERESTART);
662 1.1 cgd if (!error && auio.uio_resid > 0) {
663 1.41 fvdl if (len != auio.uio_resid)
664 1.41 fvdl log(LOG_INFO,
665 1.49 thorpej "short receive (%lu/%d) from nfs server %s\n",
666 1.49 thorpej (u_long)len - auio.uio_resid, len,
667 1.14 mycroft rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
668 1.1 cgd error = EPIPE;
669 1.1 cgd }
670 1.1 cgd } else {
671 1.14 mycroft /*
672 1.14 mycroft * NB: Since uio_resid is big, MSG_WAITALL is ignored
673 1.14 mycroft * and soreceive() will return when it has either a
674 1.14 mycroft * control msg or a data msg.
675 1.14 mycroft * We have no use for control msg., but must grab them
676 1.14 mycroft * and then throw them away so we know what is going
677 1.14 mycroft * on.
678 1.14 mycroft */
679 1.14 mycroft auio.uio_resid = len = 100000000; /* Anything Big */
680 1.92 fvdl auio.uio_procp = p;
681 1.1 cgd do {
682 1.1 cgd rcvflg = 0;
683 1.43 matt error = (*so->so_receive)(so, (struct mbuf **)0,
684 1.14 mycroft &auio, mp, &control, &rcvflg);
685 1.14 mycroft if (control)
686 1.14 mycroft m_freem(control);
687 1.1 cgd if (error == EWOULDBLOCK && rep) {
688 1.1 cgd if (rep->r_flags & R_SOFTTERM)
689 1.1 cgd return (EINTR);
690 1.1 cgd }
691 1.14 mycroft } while (error == EWOULDBLOCK ||
692 1.14 mycroft (!error && *mp == NULL && control));
693 1.14 mycroft if ((rcvflg & MSG_EOR) == 0)
694 1.31 christos printf("Egad!!\n");
695 1.1 cgd if (!error && *mp == NULL)
696 1.1 cgd error = EPIPE;
697 1.1 cgd len -= auio.uio_resid;
698 1.1 cgd }
699 1.1 cgd errout:
700 1.14 mycroft if (error && error != EINTR && error != ERESTART) {
701 1.1 cgd m_freem(*mp);
702 1.1 cgd *mp = (struct mbuf *)0;
703 1.14 mycroft if (error != EPIPE)
704 1.1 cgd log(LOG_INFO,
705 1.1 cgd "receive error %d from nfs server %s\n",
706 1.1 cgd error,
707 1.1 cgd rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
708 1.39 fvdl error = nfs_sndlock(&rep->r_nmp->nm_iflag, rep);
709 1.14 mycroft if (!error)
710 1.14 mycroft error = nfs_reconnect(rep);
711 1.1 cgd if (!error)
712 1.1 cgd goto tryagain;
713 1.37 fvdl else
714 1.39 fvdl nfs_sndunlock(&rep->r_nmp->nm_iflag);
715 1.1 cgd }
716 1.1 cgd } else {
717 1.14 mycroft if ((so = rep->r_nmp->nm_so) == NULL)
718 1.14 mycroft return (EACCES);
719 1.1 cgd if (so->so_state & SS_ISCONNECTED)
720 1.1 cgd getnam = (struct mbuf **)0;
721 1.1 cgd else
722 1.1 cgd getnam = aname;
723 1.1 cgd auio.uio_resid = len = 1000000;
724 1.92 fvdl auio.uio_procp = p;
725 1.1 cgd do {
726 1.1 cgd rcvflg = 0;
727 1.43 matt error = (*so->so_receive)(so, getnam, &auio, mp,
728 1.1 cgd (struct mbuf **)0, &rcvflg);
729 1.14 mycroft if (error == EWOULDBLOCK &&
730 1.1 cgd (rep->r_flags & R_SOFTTERM))
731 1.1 cgd return (EINTR);
732 1.1 cgd } while (error == EWOULDBLOCK);
733 1.1 cgd len -= auio.uio_resid;
734 1.51 sommerfe if (!error && *mp == NULL)
735 1.51 sommerfe error = EPIPE;
736 1.1 cgd }
737 1.1 cgd if (error) {
738 1.1 cgd m_freem(*mp);
739 1.1 cgd *mp = (struct mbuf *)0;
740 1.1 cgd }
741 1.1 cgd return (error);
742 1.1 cgd }
743 1.1 cgd
744 1.1 cgd /*
745 1.1 cgd * Implement receipt of reply on a socket.
746 1.1 cgd * We must search through the list of received datagrams matching them
747 1.1 cgd * with outstanding requests using the xid, until ours is found.
748 1.1 cgd */
749 1.1 cgd /* ARGSUSED */
750 1.23 christos int
751 1.14 mycroft nfs_reply(myrep)
752 1.1 cgd struct nfsreq *myrep;
753 1.1 cgd {
754 1.55 augustss struct nfsreq *rep;
755 1.55 augustss struct nfsmount *nmp = myrep->r_nmp;
756 1.55 augustss int32_t t1;
757 1.14 mycroft struct mbuf *mrep, *nam, *md;
758 1.22 cgd u_int32_t rxid, *tl;
759 1.14 mycroft caddr_t dpos, cp2;
760 1.14 mycroft int error;
761 1.1 cgd
762 1.1 cgd /*
763 1.1 cgd * Loop around until we get our own reply
764 1.1 cgd */
765 1.1 cgd for (;;) {
766 1.1 cgd /*
767 1.1 cgd * Lock against other receivers so that I don't get stuck in
768 1.1 cgd * sbwait() after someone else has received my reply for me.
769 1.1 cgd * Also necessary for connection based protocols to avoid
770 1.1 cgd * race conditions during a reconnect.
771 1.1 cgd */
772 1.24 fvdl error = nfs_rcvlock(myrep);
773 1.36 fvdl if (error == EALREADY)
774 1.36 fvdl return (0);
775 1.24 fvdl if (error)
776 1.14 mycroft return (error);
777 1.1 cgd /*
778 1.1 cgd * Get the next Rpc reply off the socket
779 1.1 cgd */
780 1.51 sommerfe nmp->nm_waiters++;
781 1.14 mycroft error = nfs_receive(myrep, &nam, &mrep);
782 1.87 yamt nfs_rcvunlock(nmp);
783 1.14 mycroft if (error) {
784 1.1 cgd
785 1.51 sommerfe if (nmp->nm_iflag & NFSMNT_DISMNT) {
786 1.51 sommerfe /*
787 1.51 sommerfe * Oops, we're going away now..
788 1.51 sommerfe */
789 1.51 sommerfe nmp->nm_waiters--;
790 1.51 sommerfe wakeup (&nmp->nm_waiters);
791 1.51 sommerfe return error;
792 1.51 sommerfe }
793 1.51 sommerfe nmp->nm_waiters--;
794 1.1 cgd /*
795 1.58 mrg * Ignore routing errors on connectionless protocols? ?
796 1.1 cgd */
797 1.1 cgd if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
798 1.1 cgd nmp->nm_so->so_error = 0;
799 1.37 fvdl #ifdef DEBUG
800 1.33 fvdl printf("nfs_reply: ignoring error %d\n", error);
801 1.37 fvdl #endif
802 1.14 mycroft if (myrep->r_flags & R_GETONEREP)
803 1.14 mycroft return (0);
804 1.1 cgd continue;
805 1.1 cgd }
806 1.1 cgd return (error);
807 1.1 cgd }
808 1.51 sommerfe nmp->nm_waiters--;
809 1.14 mycroft if (nam)
810 1.14 mycroft m_freem(nam);
811 1.40 fvdl
812 1.1 cgd /*
813 1.1 cgd * Get the xid and check that it is an rpc reply
814 1.1 cgd */
815 1.14 mycroft md = mrep;
816 1.14 mycroft dpos = mtod(md, caddr_t);
817 1.22 cgd nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED);
818 1.14 mycroft rxid = *tl++;
819 1.14 mycroft if (*tl != rpc_reply) {
820 1.61 bjh21 #ifndef NFS_V2_ONLY
821 1.14 mycroft if (nmp->nm_flag & NFSMNT_NQNFS) {
822 1.92 fvdl if (nqnfs_callback(nmp, mrep, md, dpos))
823 1.14 mycroft nfsstats.rpcinvalid++;
824 1.61 bjh21 } else
825 1.61 bjh21 #endif
826 1.61 bjh21 {
827 1.14 mycroft nfsstats.rpcinvalid++;
828 1.14 mycroft m_freem(mrep);
829 1.14 mycroft }
830 1.14 mycroft nfsmout:
831 1.14 mycroft if (myrep->r_flags & R_GETONEREP)
832 1.14 mycroft return (0);
833 1.1 cgd continue;
834 1.1 cgd }
835 1.14 mycroft
836 1.1 cgd /*
837 1.1 cgd * Loop through the request list to match up the reply
838 1.1 cgd * Iff no match, just drop the datagram
839 1.1 cgd */
840 1.73 christos TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
841 1.1 cgd if (rep->r_mrep == NULL && rxid == rep->r_xid) {
842 1.1 cgd /* Found it.. */
843 1.14 mycroft rep->r_mrep = mrep;
844 1.14 mycroft rep->r_md = md;
845 1.14 mycroft rep->r_dpos = dpos;
846 1.14 mycroft if (nfsrtton) {
847 1.14 mycroft struct rttl *rt;
848 1.14 mycroft
849 1.14 mycroft rt = &nfsrtt.rttl[nfsrtt.pos];
850 1.14 mycroft rt->proc = rep->r_procnum;
851 1.14 mycroft rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
852 1.14 mycroft rt->sent = nmp->nm_sent;
853 1.14 mycroft rt->cwnd = nmp->nm_cwnd;
854 1.14 mycroft rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
855 1.14 mycroft rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
856 1.14 mycroft rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
857 1.14 mycroft rt->tstamp = time;
858 1.14 mycroft if (rep->r_flags & R_TIMING)
859 1.14 mycroft rt->rtt = rep->r_rtt;
860 1.14 mycroft else
861 1.14 mycroft rt->rtt = 1000000;
862 1.14 mycroft nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
863 1.14 mycroft }
864 1.1 cgd /*
865 1.14 mycroft * Update congestion window.
866 1.14 mycroft * Do the additive increase of
867 1.14 mycroft * one rpc/rtt.
868 1.14 mycroft */
869 1.14 mycroft if (nmp->nm_cwnd <= nmp->nm_sent) {
870 1.14 mycroft nmp->nm_cwnd +=
871 1.14 mycroft (NFS_CWNDSCALE * NFS_CWNDSCALE +
872 1.14 mycroft (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
873 1.14 mycroft if (nmp->nm_cwnd > NFS_MAXCWND)
874 1.14 mycroft nmp->nm_cwnd = NFS_MAXCWND;
875 1.14 mycroft }
876 1.14 mycroft rep->r_flags &= ~R_SENT;
877 1.14 mycroft nmp->nm_sent -= NFS_CWNDSCALE;
878 1.14 mycroft /*
879 1.14 mycroft * Update rtt using a gain of 0.125 on the mean
880 1.14 mycroft * and a gain of 0.25 on the deviation.
881 1.1 cgd */
882 1.1 cgd if (rep->r_flags & R_TIMING) {
883 1.14 mycroft /*
884 1.14 mycroft * Since the timer resolution of
885 1.14 mycroft * NFS_HZ is so course, it can often
886 1.14 mycroft * result in r_rtt == 0. Since
887 1.14 mycroft * r_rtt == N means that the actual
888 1.14 mycroft * rtt is between N+dt and N+2-dt ticks,
889 1.14 mycroft * add 1.
890 1.14 mycroft */
891 1.14 mycroft t1 = rep->r_rtt + 1;
892 1.14 mycroft t1 -= (NFS_SRTT(rep) >> 3);
893 1.14 mycroft NFS_SRTT(rep) += t1;
894 1.14 mycroft if (t1 < 0)
895 1.14 mycroft t1 = -t1;
896 1.14 mycroft t1 -= (NFS_SDRTT(rep) >> 2);
897 1.14 mycroft NFS_SDRTT(rep) += t1;
898 1.1 cgd }
899 1.14 mycroft nmp->nm_timeouts = 0;
900 1.1 cgd break;
901 1.1 cgd }
902 1.1 cgd }
903 1.1 cgd /*
904 1.1 cgd * If not matched to a request, drop it.
905 1.1 cgd * If it's mine, get out.
906 1.1 cgd */
907 1.16 mycroft if (rep == 0) {
908 1.1 cgd nfsstats.rpcunexpected++;
909 1.14 mycroft m_freem(mrep);
910 1.14 mycroft } else if (rep == myrep) {
911 1.14 mycroft if (rep->r_mrep == NULL)
912 1.14 mycroft panic("nfsreply nil");
913 1.14 mycroft return (0);
914 1.14 mycroft }
915 1.14 mycroft if (myrep->r_flags & R_GETONEREP)
916 1.1 cgd return (0);
917 1.1 cgd }
918 1.1 cgd }
919 1.1 cgd
920 1.1 cgd /*
921 1.1 cgd * nfs_request - goes something like this
922 1.1 cgd * - fill in request struct
923 1.1 cgd * - links it into list
924 1.1 cgd * - calls nfs_send() for first transmit
925 1.1 cgd * - calls nfs_receive() to get reply
926 1.1 cgd * - break down rpc header and return with nfs reply pointed to
927 1.1 cgd * by mrep or error
928 1.1 cgd * nb: always frees up mreq mbuf list
929 1.1 cgd */
930 1.23 christos int
931 1.102.2.3.2.1 jmc nfs_request(np, mrest, procnum, procp, cred, mrp, mdp, dposp, rexmitp)
932 1.83 drochner struct nfsnode *np;
933 1.14 mycroft struct mbuf *mrest;
934 1.1 cgd int procnum;
935 1.92 fvdl struct proc *procp;
936 1.14 mycroft struct ucred *cred;
937 1.1 cgd struct mbuf **mrp;
938 1.1 cgd struct mbuf **mdp;
939 1.1 cgd caddr_t *dposp;
940 1.102.2.3.2.1 jmc int *rexmitp;
941 1.1 cgd {
942 1.55 augustss struct mbuf *m, *mrep;
943 1.55 augustss struct nfsreq *rep;
944 1.55 augustss u_int32_t *tl;
945 1.55 augustss int i;
946 1.102.2.3 tron struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount);
947 1.14 mycroft struct mbuf *md, *mheadend;
948 1.24 fvdl char nickv[RPCX_NICKVERF];
949 1.14 mycroft time_t reqtime, waituntil;
950 1.14 mycroft caddr_t dpos, cp2;
951 1.62 fvdl int t1, s, error = 0, mrest_len, auth_len, auth_type;
952 1.97 yamt int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0;
953 1.24 fvdl int verf_len, verf_type;
954 1.22 cgd u_int32_t xid;
955 1.24 fvdl char *auth_str, *verf_str;
956 1.24 fvdl NFSKERBKEY_T key; /* save session key */
957 1.100 fvdl struct ucred acred;
958 1.62 fvdl #ifndef NFS_V2_ONLY
959 1.62 fvdl int nqlflag, cachable;
960 1.62 fvdl u_quad_t frev;
961 1.62 fvdl #endif
962 1.102.2.3 tron struct mbuf *mrest_backup = NULL;
963 1.102.2.3 tron struct ucred *origcred = NULL; /* XXX: gcc */
964 1.102.2.3 tron boolean_t retry_cred = TRUE;
965 1.102.2.3 tron boolean_t use_opencred = (np->n_flag & NUSEOPENCRED) != 0;
966 1.1 cgd
967 1.102.2.3.2.1 jmc if (rexmitp != NULL)
968 1.102.2.3.2.1 jmc *rexmitp = 0;
969 1.102.2.3.2.1 jmc
970 1.102.2.3 tron tryagain_cred:
971 1.64 bjh21 KASSERT(cred != NULL);
972 1.1 cgd MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
973 1.1 cgd rep->r_nmp = nmp;
974 1.92 fvdl rep->r_procp = procp;
975 1.14 mycroft rep->r_procnum = procnum;
976 1.14 mycroft i = 0;
977 1.14 mycroft m = mrest;
978 1.1 cgd while (m) {
979 1.14 mycroft i += m->m_len;
980 1.1 cgd m = m->m_next;
981 1.1 cgd }
982 1.14 mycroft mrest_len = i;
983 1.14 mycroft
984 1.14 mycroft /*
985 1.14 mycroft * Get the RPC header with authorization.
986 1.14 mycroft */
987 1.14 mycroft kerbauth:
988 1.24 fvdl verf_str = auth_str = (char *)0;
989 1.14 mycroft if (nmp->nm_flag & NFSMNT_KERB) {
990 1.24 fvdl verf_str = nickv;
991 1.24 fvdl verf_len = sizeof (nickv);
992 1.24 fvdl auth_type = RPCAUTH_KERB4;
993 1.46 perry memset((caddr_t)key, 0, sizeof (key));
994 1.24 fvdl if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
995 1.24 fvdl &auth_len, verf_str, verf_len)) {
996 1.24 fvdl error = nfs_getauth(nmp, rep, cred, &auth_str,
997 1.24 fvdl &auth_len, verf_str, &verf_len, key);
998 1.14 mycroft if (error) {
999 1.14 mycroft free((caddr_t)rep, M_NFSREQ);
1000 1.14 mycroft m_freem(mrest);
1001 1.14 mycroft return (error);
1002 1.14 mycroft }
1003 1.1 cgd }
1004 1.102.2.3 tron retry_cred = FALSE;
1005 1.14 mycroft } else {
1006 1.102.2.3 tron /* AUTH_UNIX */
1007 1.102.2.3 tron uid_t uid;
1008 1.102.2.3 tron gid_t gid;
1009 1.102.2.3 tron
1010 1.102.2.3 tron /*
1011 1.102.2.3 tron * on the most unix filesystems, permission checks are
1012 1.102.2.3 tron * done when the file is open(2)'ed.
1013 1.102.2.3 tron * ie. once a file is successfully open'ed,
1014 1.102.2.3 tron * following i/o operations never fail with EACCES.
1015 1.102.2.3 tron * we try to follow the semantics as far as possible.
1016 1.102.2.3 tron *
1017 1.102.2.3 tron * note that we expect that the nfs server always grant
1018 1.102.2.3 tron * accesses by the file's owner.
1019 1.102.2.3 tron */
1020 1.102.2.3 tron origcred = cred;
1021 1.100 fvdl switch (procnum) {
1022 1.100 fvdl case NFSPROC_READ:
1023 1.100 fvdl case NFSPROC_WRITE:
1024 1.100 fvdl case NFSPROC_COMMIT:
1025 1.102.2.3 tron uid = np->n_vattr->va_uid;
1026 1.102.2.3 tron gid = np->n_vattr->va_gid;
1027 1.102.2.3 tron if (cred->cr_uid == uid && cred->cr_gid == gid) {
1028 1.102.2.3 tron retry_cred = FALSE;
1029 1.102.2.3 tron break;
1030 1.102.2.3 tron }
1031 1.102.2.3 tron if (use_opencred)
1032 1.102.2.3 tron break;
1033 1.102.2.3 tron acred.cr_uid = uid;
1034 1.102.2.3 tron acred.cr_gid = gid;
1035 1.100 fvdl acred.cr_ngroups = 0;
1036 1.100 fvdl acred.cr_ref = 2; /* Just to be safe.. */
1037 1.100 fvdl cred = &acred;
1038 1.100 fvdl break;
1039 1.102.2.3 tron default:
1040 1.102.2.3 tron retry_cred = FALSE;
1041 1.102.2.3 tron break;
1042 1.100 fvdl }
1043 1.102.2.3 tron /*
1044 1.102.2.3 tron * backup mbuf chain if we can need it later to retry.
1045 1.102.2.3 tron *
1046 1.102.2.3 tron * XXX maybe we can keep a direct reference to
1047 1.102.2.3 tron * mrest without doing m_copym, but it's ...ugly.
1048 1.102.2.3 tron */
1049 1.102.2.3 tron if (retry_cred)
1050 1.102.2.3 tron mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT);
1051 1.14 mycroft auth_type = RPCAUTH_UNIX;
1052 1.20 mycroft auth_len = (((cred->cr_ngroups > nmp->nm_numgrps) ?
1053 1.20 mycroft nmp->nm_numgrps : cred->cr_ngroups) << 2) +
1054 1.14 mycroft 5 * NFSX_UNSIGNED;
1055 1.14 mycroft }
1056 1.24 fvdl m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
1057 1.24 fvdl auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
1058 1.14 mycroft if (auth_str)
1059 1.14 mycroft free(auth_str, M_TEMP);
1060 1.14 mycroft
1061 1.1 cgd /*
1062 1.14 mycroft * For stream protocols, insert a Sun RPC Record Mark.
1063 1.1 cgd */
1064 1.14 mycroft if (nmp->nm_sotype == SOCK_STREAM) {
1065 1.14 mycroft M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
1066 1.22 cgd *mtod(m, u_int32_t *) = htonl(0x80000000 |
1067 1.14 mycroft (m->m_pkthdr.len - NFSX_UNSIGNED));
1068 1.1 cgd }
1069 1.14 mycroft rep->r_mreq = m;
1070 1.14 mycroft rep->r_xid = xid;
1071 1.14 mycroft tryagain:
1072 1.14 mycroft if (nmp->nm_flag & NFSMNT_SOFT)
1073 1.14 mycroft rep->r_retry = nmp->nm_retry;
1074 1.14 mycroft else
1075 1.14 mycroft rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1076 1.14 mycroft rep->r_rtt = rep->r_rexmit = 0;
1077 1.14 mycroft if (proct[procnum] > 0)
1078 1.14 mycroft rep->r_flags = R_TIMING;
1079 1.14 mycroft else
1080 1.14 mycroft rep->r_flags = 0;
1081 1.14 mycroft rep->r_mrep = NULL;
1082 1.1 cgd
1083 1.1 cgd /*
1084 1.1 cgd * Do the client side RPC.
1085 1.1 cgd */
1086 1.1 cgd nfsstats.rpcrequests++;
1087 1.1 cgd /*
1088 1.1 cgd * Chain request into list of outstanding requests. Be sure
1089 1.1 cgd * to put it LAST so timer finds oldest requests first.
1090 1.1 cgd */
1091 1.35 fvdl s = splsoftnet();
1092 1.16 mycroft TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
1093 1.14 mycroft
1094 1.14 mycroft /* Get send time for nqnfs */
1095 1.14 mycroft reqtime = time.tv_sec;
1096 1.14 mycroft
1097 1.1 cgd /*
1098 1.1 cgd * If backing off another request or avoiding congestion, don't
1099 1.1 cgd * send this one now but let timer do it. If not timing a request,
1100 1.1 cgd * do it now.
1101 1.1 cgd */
1102 1.14 mycroft if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
1103 1.14 mycroft (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1104 1.14 mycroft nmp->nm_sent < nmp->nm_cwnd)) {
1105 1.1 cgd splx(s);
1106 1.1 cgd if (nmp->nm_soflags & PR_CONNREQUIRED)
1107 1.39 fvdl error = nfs_sndlock(&nmp->nm_iflag, rep);
1108 1.14 mycroft if (!error) {
1109 1.52 fvdl m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
1110 1.14 mycroft error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
1111 1.14 mycroft if (nmp->nm_soflags & PR_CONNREQUIRED)
1112 1.39 fvdl nfs_sndunlock(&nmp->nm_iflag);
1113 1.14 mycroft }
1114 1.14 mycroft if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
1115 1.14 mycroft nmp->nm_sent += NFS_CWNDSCALE;
1116 1.14 mycroft rep->r_flags |= R_SENT;
1117 1.14 mycroft }
1118 1.14 mycroft } else {
1119 1.1 cgd splx(s);
1120 1.14 mycroft rep->r_rtt = -1;
1121 1.14 mycroft }
1122 1.1 cgd
1123 1.1 cgd /*
1124 1.1 cgd * Wait for the reply from our send or the timer's.
1125 1.1 cgd */
1126 1.14 mycroft if (!error || error == EPIPE)
1127 1.14 mycroft error = nfs_reply(rep);
1128 1.1 cgd
1129 1.1 cgd /*
1130 1.1 cgd * RPC done, unlink the request.
1131 1.1 cgd */
1132 1.35 fvdl s = splsoftnet();
1133 1.16 mycroft TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
1134 1.1 cgd splx(s);
1135 1.1 cgd
1136 1.1 cgd /*
1137 1.14 mycroft * Decrement the outstanding request count.
1138 1.14 mycroft */
1139 1.14 mycroft if (rep->r_flags & R_SENT) {
1140 1.14 mycroft rep->r_flags &= ~R_SENT; /* paranoia */
1141 1.14 mycroft nmp->nm_sent -= NFS_CWNDSCALE;
1142 1.14 mycroft }
1143 1.14 mycroft
1144 1.102.2.3.2.1 jmc if (rexmitp != NULL) {
1145 1.102.2.3.2.1 jmc int rexmit;
1146 1.102.2.3.2.1 jmc
1147 1.102.2.3.2.1 jmc if (nmp->nm_sotype != SOCK_DGRAM)
1148 1.102.2.3.2.1 jmc rexmit = (rep->r_flags & R_REXMITTED) != 0;
1149 1.102.2.3.2.1 jmc else
1150 1.102.2.3.2.1 jmc rexmit = rep->r_rexmit;
1151 1.102.2.3.2.1 jmc *rexmitp = rexmit;
1152 1.102.2.3.2.1 jmc }
1153 1.102.2.3.2.1 jmc
1154 1.14 mycroft /*
1155 1.1 cgd * If there was a successful reply and a tprintf msg.
1156 1.1 cgd * tprintf a response.
1157 1.1 cgd */
1158 1.1 cgd if (!error && (rep->r_flags & R_TPRINTFMSG))
1159 1.92 fvdl nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
1160 1.1 cgd "is alive again");
1161 1.1 cgd mrep = rep->r_mrep;
1162 1.14 mycroft md = rep->r_md;
1163 1.14 mycroft dpos = rep->r_dpos;
1164 1.102.2.3 tron if (error)
1165 1.102.2.3 tron goto nfsmout;
1166 1.1 cgd
1167 1.1 cgd /*
1168 1.1 cgd * break down the rpc header and check if ok
1169 1.1 cgd */
1170 1.24 fvdl nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1171 1.1 cgd if (*tl++ == rpc_msgdenied) {
1172 1.1 cgd if (*tl == rpc_mismatch)
1173 1.1 cgd error = EOPNOTSUPP;
1174 1.14 mycroft else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1175 1.24 fvdl if (!failed_auth) {
1176 1.14 mycroft failed_auth++;
1177 1.14 mycroft mheadend->m_next = (struct mbuf *)0;
1178 1.14 mycroft m_freem(mrep);
1179 1.14 mycroft m_freem(rep->r_mreq);
1180 1.14 mycroft goto kerbauth;
1181 1.14 mycroft } else
1182 1.14 mycroft error = EAUTH;
1183 1.14 mycroft } else
1184 1.1 cgd error = EACCES;
1185 1.1 cgd m_freem(mrep);
1186 1.102.2.3 tron goto nfsmout;
1187 1.1 cgd }
1188 1.14 mycroft
1189 1.1 cgd /*
1190 1.24 fvdl * Grab any Kerberos verifier, otherwise just throw it away.
1191 1.1 cgd */
1192 1.24 fvdl verf_type = fxdr_unsigned(int, *tl++);
1193 1.24 fvdl i = fxdr_unsigned(int32_t, *tl);
1194 1.24 fvdl if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1195 1.24 fvdl error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1196 1.24 fvdl if (error)
1197 1.24 fvdl goto nfsmout;
1198 1.24 fvdl } else if (i > 0)
1199 1.24 fvdl nfsm_adv(nfsm_rndup(i));
1200 1.22 cgd nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1201 1.1 cgd /* 0 == ok */
1202 1.1 cgd if (*tl == 0) {
1203 1.22 cgd nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1204 1.1 cgd if (*tl != 0) {
1205 1.1 cgd error = fxdr_unsigned(int, *tl);
1206 1.102.2.3 tron if (error == NFSERR_ACCES && retry_cred) {
1207 1.102.2.3 tron m_freem(mrep);
1208 1.102.2.3 tron m_freem(rep->r_mreq);
1209 1.102.2.3 tron FREE(rep, M_NFSREQ);
1210 1.102.2.3 tron use_opencred = !use_opencred;
1211 1.102.2.3 tron if (mrest_backup == NULL)
1212 1.102.2.3 tron return ENOMEM; /* m_copym failure */
1213 1.102.2.3 tron mrest = mrest_backup;
1214 1.102.2.3 tron mrest_backup = NULL;
1215 1.102.2.3 tron cred = origcred;
1216 1.102.2.3 tron error = 0;
1217 1.102.2.3 tron retry_cred = FALSE;
1218 1.102.2.3 tron goto tryagain_cred;
1219 1.102.2.3 tron }
1220 1.24 fvdl if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1221 1.24 fvdl error == NFSERR_TRYLATER) {
1222 1.24 fvdl m_freem(mrep);
1223 1.14 mycroft error = 0;
1224 1.14 mycroft waituntil = time.tv_sec + trylater_delay;
1225 1.14 mycroft while (time.tv_sec < waituntil)
1226 1.14 mycroft (void) tsleep((caddr_t)&lbolt,
1227 1.14 mycroft PSOCK, "nqnfstry", 0);
1228 1.97 yamt trylater_delay *= NFS_TRYLATERDELMUL;
1229 1.97 yamt if (trylater_delay > NFS_TRYLATERDELMAX)
1230 1.97 yamt trylater_delay = NFS_TRYLATERDELMAX;
1231 1.95 yamt /*
1232 1.95 yamt * RFC1813:
1233 1.95 yamt * The client should wait and then try
1234 1.95 yamt * the request with a new RPC transaction ID.
1235 1.95 yamt */
1236 1.95 yamt nfs_renewxid(rep);
1237 1.14 mycroft goto tryagain;
1238 1.14 mycroft }
1239 1.14 mycroft
1240 1.14 mycroft /*
1241 1.14 mycroft * If the File Handle was stale, invalidate the
1242 1.14 mycroft * lookup cache, just in case.
1243 1.14 mycroft */
1244 1.14 mycroft if (error == ESTALE)
1245 1.83 drochner cache_purge(NFSTOV(np));
1246 1.24 fvdl if (nmp->nm_flag & NFSMNT_NFSV3) {
1247 1.24 fvdl *mrp = mrep;
1248 1.24 fvdl *mdp = md;
1249 1.24 fvdl *dposp = dpos;
1250 1.24 fvdl error |= NFSERR_RETERR;
1251 1.24 fvdl } else
1252 1.24 fvdl m_freem(mrep);
1253 1.102.2.3 tron goto nfsmout;
1254 1.1 cgd }
1255 1.14 mycroft
1256 1.102.2.3 tron /*
1257 1.102.2.3 tron * note which credential worked to minimize number of retries.
1258 1.102.2.3 tron */
1259 1.102.2.3 tron if (use_opencred)
1260 1.102.2.3 tron np->n_flag |= NUSEOPENCRED;
1261 1.102.2.3 tron else
1262 1.102.2.3 tron np->n_flag &= ~NUSEOPENCRED;
1263 1.102.2.3 tron
1264 1.61 bjh21 #ifndef NFS_V2_ONLY
1265 1.14 mycroft /*
1266 1.14 mycroft * For nqnfs, get any lease in reply
1267 1.14 mycroft */
1268 1.14 mycroft if (nmp->nm_flag & NFSMNT_NQNFS) {
1269 1.22 cgd nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1270 1.14 mycroft if (*tl) {
1271 1.14 mycroft nqlflag = fxdr_unsigned(int, *tl);
1272 1.22 cgd nfsm_dissect(tl, u_int32_t *, 4*NFSX_UNSIGNED);
1273 1.14 mycroft cachable = fxdr_unsigned(int, *tl++);
1274 1.14 mycroft reqtime += fxdr_unsigned(int, *tl++);
1275 1.14 mycroft if (reqtime > time.tv_sec) {
1276 1.50 fair frev = fxdr_hyper(tl);
1277 1.14 mycroft nqnfs_clientlease(nmp, np, nqlflag,
1278 1.14 mycroft cachable, reqtime, frev);
1279 1.14 mycroft }
1280 1.14 mycroft }
1281 1.14 mycroft }
1282 1.61 bjh21 #endif
1283 1.1 cgd *mrp = mrep;
1284 1.1 cgd *mdp = md;
1285 1.1 cgd *dposp = dpos;
1286 1.102.2.3 tron
1287 1.102.2.3 tron KASSERT(error == 0);
1288 1.102.2.3 tron goto nfsmout;
1289 1.1 cgd }
1290 1.1 cgd m_freem(mrep);
1291 1.24 fvdl error = EPROTONOSUPPORT;
1292 1.24 fvdl nfsmout:
1293 1.14 mycroft m_freem(rep->r_mreq);
1294 1.14 mycroft free((caddr_t)rep, M_NFSREQ);
1295 1.102.2.3 tron m_freem(mrest_backup);
1296 1.1 cgd return (error);
1297 1.1 cgd }
1298 1.32 thorpej #endif /* NFS */
1299 1.1 cgd
1300 1.1 cgd /*
1301 1.1 cgd * Generate the rpc reply header
1302 1.1 cgd * siz arg. is used to decide if adding a cluster is worthwhile
1303 1.1 cgd */
1304 1.23 christos int
1305 1.24 fvdl nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
1306 1.1 cgd int siz;
1307 1.24 fvdl struct nfsrv_descript *nd;
1308 1.24 fvdl struct nfssvc_sock *slp;
1309 1.1 cgd int err;
1310 1.14 mycroft int cache;
1311 1.14 mycroft u_quad_t *frev;
1312 1.1 cgd struct mbuf **mrq;
1313 1.1 cgd struct mbuf **mbp;
1314 1.1 cgd caddr_t *bposp;
1315 1.1 cgd {
1316 1.55 augustss u_int32_t *tl;
1317 1.55 augustss struct mbuf *mreq;
1318 1.1 cgd caddr_t bpos;
1319 1.79 matt struct mbuf *mb;
1320 1.1 cgd
1321 1.79 matt mreq = m_gethdr(M_WAIT, MT_DATA);
1322 1.79 matt MCLAIM(mreq, &nfs_mowner);
1323 1.1 cgd mb = mreq;
1324 1.14 mycroft /*
1325 1.14 mycroft * If this is a big reply, use a cluster else
1326 1.14 mycroft * try and leave leading space for the lower level headers.
1327 1.14 mycroft */
1328 1.14 mycroft siz += RPC_REPLYSIZ;
1329 1.45 fvdl if (siz >= max_datalen) {
1330 1.79 matt m_clget(mreq, M_WAIT);
1331 1.14 mycroft } else
1332 1.14 mycroft mreq->m_data += max_hdr;
1333 1.22 cgd tl = mtod(mreq, u_int32_t *);
1334 1.24 fvdl mreq->m_len = 6 * NFSX_UNSIGNED;
1335 1.24 fvdl bpos = ((caddr_t)tl) + mreq->m_len;
1336 1.14 mycroft *tl++ = txdr_unsigned(nd->nd_retxid);
1337 1.1 cgd *tl++ = rpc_reply;
1338 1.24 fvdl if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1339 1.1 cgd *tl++ = rpc_msgdenied;
1340 1.24 fvdl if (err & NFSERR_AUTHERR) {
1341 1.14 mycroft *tl++ = rpc_autherr;
1342 1.24 fvdl *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1343 1.14 mycroft mreq->m_len -= NFSX_UNSIGNED;
1344 1.14 mycroft bpos -= NFSX_UNSIGNED;
1345 1.14 mycroft } else {
1346 1.14 mycroft *tl++ = rpc_mismatch;
1347 1.24 fvdl *tl++ = txdr_unsigned(RPC_VER2);
1348 1.24 fvdl *tl = txdr_unsigned(RPC_VER2);
1349 1.14 mycroft }
1350 1.1 cgd } else {
1351 1.1 cgd *tl++ = rpc_msgaccepted;
1352 1.24 fvdl
1353 1.24 fvdl /*
1354 1.24 fvdl * For Kerberos authentication, we must send the nickname
1355 1.24 fvdl * verifier back, otherwise just RPCAUTH_NULL.
1356 1.24 fvdl */
1357 1.24 fvdl if (nd->nd_flag & ND_KERBFULL) {
1358 1.84 yamt struct nfsuid *nuidp;
1359 1.84 yamt struct timeval ktvin, ktvout;
1360 1.24 fvdl
1361 1.84 yamt LIST_FOREACH(nuidp, NUIDHASH(slp, nd->nd_cr.cr_uid),
1362 1.84 yamt nu_hash) {
1363 1.84 yamt if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
1364 1.84 yamt (!nd->nd_nam2 || netaddr_match(
1365 1.84 yamt NU_NETFAM(nuidp), &nuidp->nu_haddr,
1366 1.84 yamt nd->nd_nam2)))
1367 1.84 yamt break;
1368 1.84 yamt }
1369 1.84 yamt if (nuidp) {
1370 1.84 yamt ktvin.tv_sec =
1371 1.84 yamt txdr_unsigned(nuidp->nu_timestamp.tv_sec
1372 1.84 yamt - 1);
1373 1.84 yamt ktvin.tv_usec =
1374 1.84 yamt txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1375 1.24 fvdl
1376 1.84 yamt /*
1377 1.84 yamt * Encrypt the timestamp in ecb mode using the
1378 1.84 yamt * session key.
1379 1.84 yamt */
1380 1.24 fvdl #ifdef NFSKERB
1381 1.84 yamt XXX
1382 1.24 fvdl #endif
1383 1.24 fvdl
1384 1.84 yamt *tl++ = rpc_auth_kerb;
1385 1.84 yamt *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1386 1.84 yamt *tl = ktvout.tv_sec;
1387 1.84 yamt nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1388 1.84 yamt *tl++ = ktvout.tv_usec;
1389 1.84 yamt *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
1390 1.84 yamt } else {
1391 1.84 yamt *tl++ = 0;
1392 1.84 yamt *tl++ = 0;
1393 1.84 yamt }
1394 1.24 fvdl } else {
1395 1.24 fvdl *tl++ = 0;
1396 1.24 fvdl *tl++ = 0;
1397 1.24 fvdl }
1398 1.1 cgd switch (err) {
1399 1.1 cgd case EPROGUNAVAIL:
1400 1.1 cgd *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1401 1.1 cgd break;
1402 1.1 cgd case EPROGMISMATCH:
1403 1.1 cgd *tl = txdr_unsigned(RPC_PROGMISMATCH);
1404 1.24 fvdl nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1405 1.24 fvdl if (nd->nd_flag & ND_NQNFS) {
1406 1.24 fvdl *tl++ = txdr_unsigned(3);
1407 1.24 fvdl *tl = txdr_unsigned(3);
1408 1.24 fvdl } else {
1409 1.24 fvdl *tl++ = txdr_unsigned(2);
1410 1.24 fvdl *tl = txdr_unsigned(3);
1411 1.24 fvdl }
1412 1.1 cgd break;
1413 1.1 cgd case EPROCUNAVAIL:
1414 1.1 cgd *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1415 1.1 cgd break;
1416 1.24 fvdl case EBADRPC:
1417 1.24 fvdl *tl = txdr_unsigned(RPC_GARBAGE);
1418 1.24 fvdl break;
1419 1.1 cgd default:
1420 1.1 cgd *tl = 0;
1421 1.24 fvdl if (err != NFSERR_RETVOID) {
1422 1.22 cgd nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1423 1.14 mycroft if (err)
1424 1.24 fvdl *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1425 1.14 mycroft else
1426 1.24 fvdl *tl = 0;
1427 1.1 cgd }
1428 1.1 cgd break;
1429 1.1 cgd };
1430 1.1 cgd }
1431 1.14 mycroft
1432 1.14 mycroft /*
1433 1.14 mycroft * For nqnfs, piggyback lease as requested.
1434 1.14 mycroft */
1435 1.24 fvdl if ((nd->nd_flag & ND_NQNFS) && err == 0) {
1436 1.24 fvdl if (nd->nd_flag & ND_LEASE) {
1437 1.24 fvdl nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1438 1.24 fvdl *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
1439 1.14 mycroft *tl++ = txdr_unsigned(cache);
1440 1.14 mycroft *tl++ = txdr_unsigned(nd->nd_duration);
1441 1.50 fair txdr_hyper(*frev, tl);
1442 1.14 mycroft } else {
1443 1.22 cgd nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1444 1.14 mycroft *tl = 0;
1445 1.14 mycroft }
1446 1.14 mycroft }
1447 1.34 fvdl if (mrq != NULL)
1448 1.34 fvdl *mrq = mreq;
1449 1.1 cgd *mbp = mb;
1450 1.1 cgd *bposp = bpos;
1451 1.24 fvdl if (err != 0 && err != NFSERR_RETVOID)
1452 1.1 cgd nfsstats.srvrpc_errs++;
1453 1.1 cgd return (0);
1454 1.1 cgd }
1455 1.1 cgd
1456 1.1 cgd /*
1457 1.1 cgd * Nfs timer routine
1458 1.1 cgd * Scan the nfsreq list and retranmit any requests that have timed out
1459 1.1 cgd * To avoid retransmission attempts on STREAM sockets (in the future) make
1460 1.1 cgd * sure to set the r_retry field to 0 (implies nm_retry == 0).
1461 1.1 cgd */
1462 1.7 mycroft void
1463 1.14 mycroft nfs_timer(arg)
1464 1.24 fvdl void *arg; /* never used */
1465 1.1 cgd {
1466 1.55 augustss struct nfsreq *rep;
1467 1.55 augustss struct mbuf *m;
1468 1.55 augustss struct socket *so;
1469 1.55 augustss struct nfsmount *nmp;
1470 1.55 augustss int timeo;
1471 1.27 thorpej int s, error;
1472 1.27 thorpej #ifdef NFSSERVER
1473 1.55 augustss struct nfssvc_sock *slp;
1474 1.14 mycroft static long lasttime = 0;
1475 1.27 thorpej u_quad_t cur_usec;
1476 1.23 christos #endif
1477 1.1 cgd
1478 1.21 mycroft s = splsoftnet();
1479 1.73 christos TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
1480 1.1 cgd nmp = rep->r_nmp;
1481 1.14 mycroft if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1482 1.1 cgd continue;
1483 1.92 fvdl if (nfs_sigintr(nmp, rep, rep->r_procp)) {
1484 1.1 cgd rep->r_flags |= R_SOFTTERM;
1485 1.1 cgd continue;
1486 1.1 cgd }
1487 1.14 mycroft if (rep->r_rtt >= 0) {
1488 1.14 mycroft rep->r_rtt++;
1489 1.14 mycroft if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1490 1.14 mycroft timeo = nmp->nm_timeo;
1491 1.14 mycroft else
1492 1.14 mycroft timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
1493 1.14 mycroft if (nmp->nm_timeouts > 0)
1494 1.14 mycroft timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1495 1.14 mycroft if (rep->r_rtt <= timeo)
1496 1.14 mycroft continue;
1497 1.98 yamt if (nmp->nm_timeouts <
1498 1.98 yamt (sizeof(nfs_backoff) / sizeof(nfs_backoff[0])))
1499 1.14 mycroft nmp->nm_timeouts++;
1500 1.1 cgd }
1501 1.1 cgd /*
1502 1.1 cgd * Check for server not responding
1503 1.1 cgd */
1504 1.1 cgd if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
1505 1.14 mycroft rep->r_rexmit > nmp->nm_deadthresh) {
1506 1.92 fvdl nfs_msg(rep->r_procp,
1507 1.1 cgd nmp->nm_mountp->mnt_stat.f_mntfromname,
1508 1.1 cgd "not responding");
1509 1.1 cgd rep->r_flags |= R_TPRINTFMSG;
1510 1.1 cgd }
1511 1.1 cgd if (rep->r_rexmit >= rep->r_retry) { /* too many */
1512 1.1 cgd nfsstats.rpctimeouts++;
1513 1.1 cgd rep->r_flags |= R_SOFTTERM;
1514 1.1 cgd continue;
1515 1.1 cgd }
1516 1.14 mycroft if (nmp->nm_sotype != SOCK_DGRAM) {
1517 1.14 mycroft if (++rep->r_rexmit > NFS_MAXREXMIT)
1518 1.14 mycroft rep->r_rexmit = NFS_MAXREXMIT;
1519 1.14 mycroft continue;
1520 1.14 mycroft }
1521 1.14 mycroft if ((so = nmp->nm_so) == NULL)
1522 1.1 cgd continue;
1523 1.1 cgd
1524 1.1 cgd /*
1525 1.1 cgd * If there is enough space and the window allows..
1526 1.1 cgd * Resend it
1527 1.14 mycroft * Set r_rtt to -1 in case we fail to send it now.
1528 1.1 cgd */
1529 1.14 mycroft rep->r_rtt = -1;
1530 1.1 cgd if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1531 1.14 mycroft ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1532 1.14 mycroft (rep->r_flags & R_SENT) ||
1533 1.14 mycroft nmp->nm_sent < nmp->nm_cwnd) &&
1534 1.14 mycroft (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1535 1.40 fvdl if (so->so_state & SS_ISCONNECTED)
1536 1.1 cgd error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1537 1.92 fvdl (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
1538 1.1 cgd else
1539 1.1 cgd error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1540 1.92 fvdl nmp->nm_nam, (struct mbuf *)0, (struct proc *)0);
1541 1.1 cgd if (error) {
1542 1.33 fvdl if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
1543 1.37 fvdl #ifdef DEBUG
1544 1.33 fvdl printf("nfs_timer: ignoring error %d\n",
1545 1.33 fvdl error);
1546 1.37 fvdl #endif
1547 1.1 cgd so->so_error = 0;
1548 1.33 fvdl }
1549 1.1 cgd } else {
1550 1.1 cgd /*
1551 1.14 mycroft * Iff first send, start timing
1552 1.14 mycroft * else turn timing off, backoff timer
1553 1.14 mycroft * and divide congestion window by 2.
1554 1.1 cgd */
1555 1.14 mycroft if (rep->r_flags & R_SENT) {
1556 1.14 mycroft rep->r_flags &= ~R_TIMING;
1557 1.14 mycroft if (++rep->r_rexmit > NFS_MAXREXMIT)
1558 1.14 mycroft rep->r_rexmit = NFS_MAXREXMIT;
1559 1.14 mycroft nmp->nm_cwnd >>= 1;
1560 1.14 mycroft if (nmp->nm_cwnd < NFS_CWNDSCALE)
1561 1.14 mycroft nmp->nm_cwnd = NFS_CWNDSCALE;
1562 1.14 mycroft nfsstats.rpcretries++;
1563 1.14 mycroft } else {
1564 1.14 mycroft rep->r_flags |= R_SENT;
1565 1.14 mycroft nmp->nm_sent += NFS_CWNDSCALE;
1566 1.14 mycroft }
1567 1.14 mycroft rep->r_rtt = 0;
1568 1.1 cgd }
1569 1.1 cgd }
1570 1.1 cgd }
1571 1.14 mycroft
1572 1.14 mycroft #ifdef NFSSERVER
1573 1.14 mycroft /*
1574 1.14 mycroft * Call the nqnfs server timer once a second to handle leases.
1575 1.14 mycroft */
1576 1.14 mycroft if (lasttime != time.tv_sec) {
1577 1.14 mycroft lasttime = time.tv_sec;
1578 1.14 mycroft nqnfs_serverd();
1579 1.14 mycroft }
1580 1.24 fvdl
1581 1.24 fvdl /*
1582 1.24 fvdl * Scan the write gathering queues for writes that need to be
1583 1.24 fvdl * completed now.
1584 1.24 fvdl */
1585 1.24 fvdl cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec;
1586 1.73 christos TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
1587 1.80 yamt if (LIST_FIRST(&slp->ns_tq) &&
1588 1.80 yamt LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
1589 1.24 fvdl nfsrv_wakenfsd(slp);
1590 1.24 fvdl }
1591 1.14 mycroft #endif /* NFSSERVER */
1592 1.1 cgd splx(s);
1593 1.99 yamt callout_schedule(&nfs_timer_ch, nfs_ticks);
1594 1.1 cgd }
1595 1.1 cgd
1596 1.73 christos /*ARGSUSED*/
1597 1.73 christos void
1598 1.92 fvdl nfs_exit(p, v)
1599 1.92 fvdl struct proc *p;
1600 1.73 christos void *v;
1601 1.73 christos {
1602 1.73 christos struct nfsreq *rp;
1603 1.73 christos int s = splsoftnet();
1604 1.73 christos
1605 1.73 christos TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
1606 1.92 fvdl if (rp->r_procp == p)
1607 1.73 christos TAILQ_REMOVE(&nfs_reqq, rp, r_chain);
1608 1.73 christos }
1609 1.73 christos splx(s);
1610 1.73 christos }
1611 1.73 christos
1612 1.1 cgd /*
1613 1.14 mycroft * Test for a termination condition pending on the process.
1614 1.14 mycroft * This is used for NFSMNT_INT mounts.
1615 1.1 cgd */
1616 1.23 christos int
1617 1.92 fvdl nfs_sigintr(nmp, rep, p)
1618 1.14 mycroft struct nfsmount *nmp;
1619 1.14 mycroft struct nfsreq *rep;
1620 1.92 fvdl struct proc *p;
1621 1.14 mycroft {
1622 1.47 mycroft sigset_t ss;
1623 1.14 mycroft
1624 1.14 mycroft if (rep && (rep->r_flags & R_SOFTTERM))
1625 1.14 mycroft return (EINTR);
1626 1.14 mycroft if (!(nmp->nm_flag & NFSMNT_INT))
1627 1.14 mycroft return (0);
1628 1.92 fvdl if (p) {
1629 1.92 fvdl sigpending1(p, &ss);
1630 1.47 mycroft #if 0
1631 1.92 fvdl sigminusset(&p->p_sigctx.ps_sigignore, &ss);
1632 1.47 mycroft #endif
1633 1.47 mycroft if (sigismember(&ss, SIGINT) || sigismember(&ss, SIGTERM) ||
1634 1.47 mycroft sigismember(&ss, SIGKILL) || sigismember(&ss, SIGHUP) ||
1635 1.47 mycroft sigismember(&ss, SIGQUIT))
1636 1.47 mycroft return (EINTR);
1637 1.47 mycroft }
1638 1.14 mycroft return (0);
1639 1.14 mycroft }
1640 1.1 cgd
1641 1.1 cgd /*
1642 1.14 mycroft * Lock a socket against others.
1643 1.14 mycroft * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1644 1.14 mycroft * and also to avoid race conditions between the processes with nfs requests
1645 1.14 mycroft * in progress when a reconnect is necessary.
1646 1.1 cgd */
1647 1.23 christos int
1648 1.14 mycroft nfs_sndlock(flagp, rep)
1649 1.55 augustss int *flagp;
1650 1.14 mycroft struct nfsreq *rep;
1651 1.14 mycroft {
1652 1.92 fvdl struct proc *p;
1653 1.14 mycroft int slpflag = 0, slptimeo = 0;
1654 1.14 mycroft
1655 1.14 mycroft if (rep) {
1656 1.92 fvdl p = rep->r_procp;
1657 1.14 mycroft if (rep->r_nmp->nm_flag & NFSMNT_INT)
1658 1.14 mycroft slpflag = PCATCH;
1659 1.14 mycroft } else
1660 1.92 fvdl p = (struct proc *)0;
1661 1.14 mycroft while (*flagp & NFSMNT_SNDLOCK) {
1662 1.102 yamt if (rep && nfs_sigintr(rep->r_nmp, rep, p))
1663 1.14 mycroft return (EINTR);
1664 1.14 mycroft *flagp |= NFSMNT_WANTSND;
1665 1.14 mycroft (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
1666 1.14 mycroft slptimeo);
1667 1.14 mycroft if (slpflag == PCATCH) {
1668 1.14 mycroft slpflag = 0;
1669 1.14 mycroft slptimeo = 2 * hz;
1670 1.14 mycroft }
1671 1.14 mycroft }
1672 1.14 mycroft *flagp |= NFSMNT_SNDLOCK;
1673 1.14 mycroft return (0);
1674 1.14 mycroft }
1675 1.1 cgd
1676 1.14 mycroft /*
1677 1.14 mycroft * Unlock the stream socket for others.
1678 1.14 mycroft */
1679 1.14 mycroft void
1680 1.14 mycroft nfs_sndunlock(flagp)
1681 1.55 augustss int *flagp;
1682 1.1 cgd {
1683 1.1 cgd
1684 1.14 mycroft if ((*flagp & NFSMNT_SNDLOCK) == 0)
1685 1.14 mycroft panic("nfs sndunlock");
1686 1.14 mycroft *flagp &= ~NFSMNT_SNDLOCK;
1687 1.14 mycroft if (*flagp & NFSMNT_WANTSND) {
1688 1.14 mycroft *flagp &= ~NFSMNT_WANTSND;
1689 1.14 mycroft wakeup((caddr_t)flagp);
1690 1.1 cgd }
1691 1.14 mycroft }
1692 1.14 mycroft
1693 1.23 christos int
1694 1.14 mycroft nfs_rcvlock(rep)
1695 1.55 augustss struct nfsreq *rep;
1696 1.14 mycroft {
1697 1.51 sommerfe struct nfsmount *nmp = rep->r_nmp;
1698 1.55 augustss int *flagp = &nmp->nm_iflag;
1699 1.14 mycroft int slpflag, slptimeo = 0;
1700 1.87 yamt int error = 0;
1701 1.14 mycroft
1702 1.51 sommerfe if (*flagp & NFSMNT_DISMNT)
1703 1.51 sommerfe return EIO;
1704 1.51 sommerfe
1705 1.14 mycroft if (*flagp & NFSMNT_INT)
1706 1.14 mycroft slpflag = PCATCH;
1707 1.14 mycroft else
1708 1.14 mycroft slpflag = 0;
1709 1.87 yamt simple_lock(&nmp->nm_slock);
1710 1.14 mycroft while (*flagp & NFSMNT_RCVLOCK) {
1711 1.92 fvdl if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) {
1712 1.87 yamt error = EINTR;
1713 1.87 yamt goto quit;
1714 1.87 yamt }
1715 1.14 mycroft *flagp |= NFSMNT_WANTRCV;
1716 1.51 sommerfe nmp->nm_waiters++;
1717 1.87 yamt (void) ltsleep(flagp, slpflag | (PZERO - 1), "nfsrcvlk",
1718 1.87 yamt slptimeo, &nmp->nm_slock);
1719 1.51 sommerfe nmp->nm_waiters--;
1720 1.51 sommerfe if (*flagp & NFSMNT_DISMNT) {
1721 1.51 sommerfe wakeup(&nmp->nm_waiters);
1722 1.87 yamt error = EIO;
1723 1.87 yamt goto quit;
1724 1.51 sommerfe }
1725 1.36 fvdl /* If our reply was received while we were sleeping,
1726 1.36 fvdl * then just return without taking the lock to avoid a
1727 1.36 fvdl * situation where a single iod could 'capture' the
1728 1.36 fvdl * receive lock.
1729 1.36 fvdl */
1730 1.87 yamt if (rep->r_mrep != NULL) {
1731 1.87 yamt error = EALREADY;
1732 1.87 yamt goto quit;
1733 1.87 yamt }
1734 1.14 mycroft if (slpflag == PCATCH) {
1735 1.14 mycroft slpflag = 0;
1736 1.14 mycroft slptimeo = 2 * hz;
1737 1.1 cgd }
1738 1.1 cgd }
1739 1.14 mycroft *flagp |= NFSMNT_RCVLOCK;
1740 1.87 yamt quit:
1741 1.87 yamt simple_unlock(&nmp->nm_slock);
1742 1.87 yamt return error;
1743 1.14 mycroft }
1744 1.14 mycroft
1745 1.14 mycroft /*
1746 1.14 mycroft * Unlock the stream socket for others.
1747 1.14 mycroft */
1748 1.14 mycroft void
1749 1.87 yamt nfs_rcvunlock(nmp)
1750 1.87 yamt struct nfsmount *nmp;
1751 1.14 mycroft {
1752 1.87 yamt int *flagp = &nmp->nm_iflag;
1753 1.14 mycroft
1754 1.87 yamt simple_lock(&nmp->nm_slock);
1755 1.14 mycroft if ((*flagp & NFSMNT_RCVLOCK) == 0)
1756 1.14 mycroft panic("nfs rcvunlock");
1757 1.14 mycroft *flagp &= ~NFSMNT_RCVLOCK;
1758 1.14 mycroft if (*flagp & NFSMNT_WANTRCV) {
1759 1.14 mycroft *flagp &= ~NFSMNT_WANTRCV;
1760 1.14 mycroft wakeup((caddr_t)flagp);
1761 1.14 mycroft }
1762 1.87 yamt simple_unlock(&nmp->nm_slock);
1763 1.1 cgd }
1764 1.1 cgd
1765 1.14 mycroft /*
1766 1.14 mycroft * Parse an RPC request
1767 1.14 mycroft * - verify it
1768 1.14 mycroft * - fill in the cred struct.
1769 1.1 cgd */
1770 1.23 christos int
1771 1.24 fvdl nfs_getreq(nd, nfsd, has_header)
1772 1.55 augustss struct nfsrv_descript *nd;
1773 1.24 fvdl struct nfsd *nfsd;
1774 1.14 mycroft int has_header;
1775 1.1 cgd {
1776 1.55 augustss int len, i;
1777 1.55 augustss u_int32_t *tl;
1778 1.55 augustss int32_t t1;
1779 1.14 mycroft struct uio uio;
1780 1.14 mycroft struct iovec iov;
1781 1.24 fvdl caddr_t dpos, cp2, cp;
1782 1.22 cgd u_int32_t nfsvers, auth_type;
1783 1.24 fvdl uid_t nickuid;
1784 1.24 fvdl int error = 0, nqnfs = 0, ticklen;
1785 1.14 mycroft struct mbuf *mrep, *md;
1786 1.55 augustss struct nfsuid *nuidp;
1787 1.24 fvdl struct timeval tvin, tvout;
1788 1.14 mycroft
1789 1.14 mycroft mrep = nd->nd_mrep;
1790 1.14 mycroft md = nd->nd_md;
1791 1.14 mycroft dpos = nd->nd_dpos;
1792 1.14 mycroft if (has_header) {
1793 1.24 fvdl nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED);
1794 1.24 fvdl nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
1795 1.14 mycroft if (*tl++ != rpc_call) {
1796 1.14 mycroft m_freem(mrep);
1797 1.14 mycroft return (EBADRPC);
1798 1.14 mycroft }
1799 1.24 fvdl } else
1800 1.24 fvdl nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
1801 1.14 mycroft nd->nd_repstat = 0;
1802 1.24 fvdl nd->nd_flag = 0;
1803 1.14 mycroft if (*tl++ != rpc_vers) {
1804 1.14 mycroft nd->nd_repstat = ERPCMISMATCH;
1805 1.14 mycroft nd->nd_procnum = NFSPROC_NOOP;
1806 1.14 mycroft return (0);
1807 1.14 mycroft }
1808 1.14 mycroft if (*tl != nfs_prog) {
1809 1.24 fvdl if (*tl == nqnfs_prog)
1810 1.14 mycroft nqnfs++;
1811 1.24 fvdl else {
1812 1.14 mycroft nd->nd_repstat = EPROGUNAVAIL;
1813 1.14 mycroft nd->nd_procnum = NFSPROC_NOOP;
1814 1.14 mycroft return (0);
1815 1.14 mycroft }
1816 1.14 mycroft }
1817 1.14 mycroft tl++;
1818 1.24 fvdl nfsvers = fxdr_unsigned(u_int32_t, *tl++);
1819 1.24 fvdl if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
1820 1.24 fvdl (nfsvers != NQNFS_VER3 && nqnfs)) {
1821 1.14 mycroft nd->nd_repstat = EPROGMISMATCH;
1822 1.14 mycroft nd->nd_procnum = NFSPROC_NOOP;
1823 1.14 mycroft return (0);
1824 1.14 mycroft }
1825 1.24 fvdl if (nqnfs)
1826 1.24 fvdl nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
1827 1.24 fvdl else if (nfsvers == NFS_VER3)
1828 1.24 fvdl nd->nd_flag = ND_NFSV3;
1829 1.24 fvdl nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
1830 1.14 mycroft if (nd->nd_procnum == NFSPROC_NULL)
1831 1.14 mycroft return (0);
1832 1.14 mycroft if (nd->nd_procnum >= NFS_NPROCS ||
1833 1.24 fvdl (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
1834 1.24 fvdl (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
1835 1.14 mycroft nd->nd_repstat = EPROCUNAVAIL;
1836 1.14 mycroft nd->nd_procnum = NFSPROC_NOOP;
1837 1.1 cgd return (0);
1838 1.14 mycroft }
1839 1.24 fvdl if ((nd->nd_flag & ND_NFSV3) == 0)
1840 1.24 fvdl nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
1841 1.14 mycroft auth_type = *tl++;
1842 1.14 mycroft len = fxdr_unsigned(int, *tl++);
1843 1.14 mycroft if (len < 0 || len > RPCAUTH_MAXSIZ) {
1844 1.14 mycroft m_freem(mrep);
1845 1.14 mycroft return (EBADRPC);
1846 1.14 mycroft }
1847 1.14 mycroft
1848 1.24 fvdl nd->nd_flag &= ~ND_KERBAUTH;
1849 1.14 mycroft /*
1850 1.14 mycroft * Handle auth_unix or auth_kerb.
1851 1.14 mycroft */
1852 1.14 mycroft if (auth_type == rpc_auth_unix) {
1853 1.14 mycroft len = fxdr_unsigned(int, *++tl);
1854 1.14 mycroft if (len < 0 || len > NFS_MAXNAMLEN) {
1855 1.14 mycroft m_freem(mrep);
1856 1.14 mycroft return (EBADRPC);
1857 1.14 mycroft }
1858 1.14 mycroft nfsm_adv(nfsm_rndup(len));
1859 1.24 fvdl nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1860 1.46 perry memset((caddr_t)&nd->nd_cr, 0, sizeof (struct ucred));
1861 1.24 fvdl nd->nd_cr.cr_ref = 1;
1862 1.14 mycroft nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
1863 1.14 mycroft nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
1864 1.14 mycroft len = fxdr_unsigned(int, *tl);
1865 1.14 mycroft if (len < 0 || len > RPCAUTH_UNIXGIDS) {
1866 1.14 mycroft m_freem(mrep);
1867 1.14 mycroft return (EBADRPC);
1868 1.14 mycroft }
1869 1.24 fvdl nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED);
1870 1.18 mycroft for (i = 0; i < len; i++)
1871 1.24 fvdl if (i < NGROUPS)
1872 1.24 fvdl nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
1873 1.24 fvdl else
1874 1.24 fvdl tl++;
1875 1.19 mycroft nd->nd_cr.cr_ngroups = (len > NGROUPS) ? NGROUPS : len;
1876 1.24 fvdl if (nd->nd_cr.cr_ngroups > 1)
1877 1.24 fvdl nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
1878 1.24 fvdl len = fxdr_unsigned(int, *++tl);
1879 1.24 fvdl if (len < 0 || len > RPCAUTH_MAXSIZ) {
1880 1.14 mycroft m_freem(mrep);
1881 1.14 mycroft return (EBADRPC);
1882 1.14 mycroft }
1883 1.24 fvdl if (len > 0)
1884 1.24 fvdl nfsm_adv(nfsm_rndup(len));
1885 1.24 fvdl } else if (auth_type == rpc_auth_kerb) {
1886 1.24 fvdl switch (fxdr_unsigned(int, *tl++)) {
1887 1.24 fvdl case RPCAKN_FULLNAME:
1888 1.24 fvdl ticklen = fxdr_unsigned(int, *tl);
1889 1.24 fvdl *((u_int32_t *)nfsd->nfsd_authstr) = *tl;
1890 1.24 fvdl uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
1891 1.24 fvdl nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
1892 1.24 fvdl if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
1893 1.24 fvdl m_freem(mrep);
1894 1.24 fvdl return (EBADRPC);
1895 1.24 fvdl }
1896 1.24 fvdl uio.uio_offset = 0;
1897 1.24 fvdl uio.uio_iov = &iov;
1898 1.24 fvdl uio.uio_iovcnt = 1;
1899 1.24 fvdl uio.uio_segflg = UIO_SYSSPACE;
1900 1.24 fvdl iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
1901 1.24 fvdl iov.iov_len = RPCAUTH_MAXSIZ - 4;
1902 1.24 fvdl nfsm_mtouio(&uio, uio.uio_resid);
1903 1.24 fvdl nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1904 1.24 fvdl if (*tl++ != rpc_auth_kerb ||
1905 1.24 fvdl fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
1906 1.31 christos printf("Bad kerb verifier\n");
1907 1.24 fvdl nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
1908 1.24 fvdl nd->nd_procnum = NFSPROC_NOOP;
1909 1.24 fvdl return (0);
1910 1.24 fvdl }
1911 1.24 fvdl nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
1912 1.24 fvdl tl = (u_int32_t *)cp;
1913 1.24 fvdl if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
1914 1.31 christos printf("Not fullname kerb verifier\n");
1915 1.24 fvdl nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
1916 1.24 fvdl nd->nd_procnum = NFSPROC_NOOP;
1917 1.24 fvdl return (0);
1918 1.24 fvdl }
1919 1.24 fvdl cp += NFSX_UNSIGNED;
1920 1.46 perry memcpy(nfsd->nfsd_verfstr, cp, 3 * NFSX_UNSIGNED);
1921 1.24 fvdl nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
1922 1.24 fvdl nd->nd_flag |= ND_KERBFULL;
1923 1.24 fvdl nfsd->nfsd_flag |= NFSD_NEEDAUTH;
1924 1.24 fvdl break;
1925 1.24 fvdl case RPCAKN_NICKNAME:
1926 1.24 fvdl if (len != 2 * NFSX_UNSIGNED) {
1927 1.31 christos printf("Kerb nickname short\n");
1928 1.24 fvdl nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
1929 1.24 fvdl nd->nd_procnum = NFSPROC_NOOP;
1930 1.24 fvdl return (0);
1931 1.24 fvdl }
1932 1.24 fvdl nickuid = fxdr_unsigned(uid_t, *tl);
1933 1.24 fvdl nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1934 1.24 fvdl if (*tl++ != rpc_auth_kerb ||
1935 1.24 fvdl fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
1936 1.31 christos printf("Kerb nick verifier bad\n");
1937 1.24 fvdl nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
1938 1.24 fvdl nd->nd_procnum = NFSPROC_NOOP;
1939 1.24 fvdl return (0);
1940 1.24 fvdl }
1941 1.24 fvdl nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1942 1.24 fvdl tvin.tv_sec = *tl++;
1943 1.24 fvdl tvin.tv_usec = *tl;
1944 1.24 fvdl
1945 1.80 yamt LIST_FOREACH(nuidp, NUIDHASH(nfsd->nfsd_slp, nickuid),
1946 1.80 yamt nu_hash) {
1947 1.24 fvdl if (nuidp->nu_cr.cr_uid == nickuid &&
1948 1.24 fvdl (!nd->nd_nam2 ||
1949 1.24 fvdl netaddr_match(NU_NETFAM(nuidp),
1950 1.24 fvdl &nuidp->nu_haddr, nd->nd_nam2)))
1951 1.24 fvdl break;
1952 1.24 fvdl }
1953 1.24 fvdl if (!nuidp) {
1954 1.24 fvdl nd->nd_repstat =
1955 1.24 fvdl (NFSERR_AUTHERR|AUTH_REJECTCRED);
1956 1.24 fvdl nd->nd_procnum = NFSPROC_NOOP;
1957 1.24 fvdl return (0);
1958 1.24 fvdl }
1959 1.24 fvdl
1960 1.24 fvdl /*
1961 1.24 fvdl * Now, decrypt the timestamp using the session key
1962 1.24 fvdl * and validate it.
1963 1.24 fvdl */
1964 1.24 fvdl #ifdef NFSKERB
1965 1.24 fvdl XXX
1966 1.24 fvdl #endif
1967 1.14 mycroft
1968 1.24 fvdl tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
1969 1.24 fvdl tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
1970 1.24 fvdl if (nuidp->nu_expire < time.tv_sec ||
1971 1.24 fvdl nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
1972 1.24 fvdl (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
1973 1.24 fvdl nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
1974 1.24 fvdl nuidp->nu_expire = 0;
1975 1.24 fvdl nd->nd_repstat =
1976 1.24 fvdl (NFSERR_AUTHERR|AUTH_REJECTVERF);
1977 1.24 fvdl nd->nd_procnum = NFSPROC_NOOP;
1978 1.24 fvdl return (0);
1979 1.24 fvdl }
1980 1.24 fvdl nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
1981 1.24 fvdl nd->nd_flag |= ND_KERBNICK;
1982 1.24 fvdl };
1983 1.24 fvdl } else {
1984 1.24 fvdl nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
1985 1.24 fvdl nd->nd_procnum = NFSPROC_NOOP;
1986 1.24 fvdl return (0);
1987 1.14 mycroft }
1988 1.14 mycroft
1989 1.14 mycroft /*
1990 1.14 mycroft * For nqnfs, get piggybacked lease request.
1991 1.14 mycroft */
1992 1.14 mycroft if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
1993 1.22 cgd nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1994 1.24 fvdl nd->nd_flag |= fxdr_unsigned(int, *tl);
1995 1.24 fvdl if (nd->nd_flag & ND_LEASE) {
1996 1.22 cgd nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1997 1.24 fvdl nd->nd_duration = fxdr_unsigned(u_int32_t, *tl);
1998 1.14 mycroft } else
1999 1.14 mycroft nd->nd_duration = NQ_MINLEASE;
2000 1.24 fvdl } else
2001 1.14 mycroft nd->nd_duration = NQ_MINLEASE;
2002 1.14 mycroft nd->nd_md = md;
2003 1.14 mycroft nd->nd_dpos = dpos;
2004 1.14 mycroft return (0);
2005 1.14 mycroft nfsmout:
2006 1.14 mycroft return (error);
2007 1.1 cgd }
2008 1.1 cgd
2009 1.24 fvdl int
2010 1.92 fvdl nfs_msg(p, server, msg)
2011 1.92 fvdl struct proc *p;
2012 1.1 cgd char *server, *msg;
2013 1.1 cgd {
2014 1.1 cgd tpr_t tpr;
2015 1.1 cgd
2016 1.92 fvdl if (p)
2017 1.92 fvdl tpr = tprintf_open(p);
2018 1.1 cgd else
2019 1.1 cgd tpr = NULL;
2020 1.1 cgd tprintf(tpr, "nfs server %s: %s\n", server, msg);
2021 1.1 cgd tprintf_close(tpr);
2022 1.24 fvdl return (0);
2023 1.1 cgd }
2024 1.1 cgd
2025 1.14 mycroft #ifdef NFSSERVER
2026 1.24 fvdl int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *,
2027 1.92 fvdl struct nfssvc_sock *, struct proc *,
2028 1.23 christos struct mbuf **)) = {
2029 1.14 mycroft nfsrv_null,
2030 1.14 mycroft nfsrv_getattr,
2031 1.14 mycroft nfsrv_setattr,
2032 1.14 mycroft nfsrv_lookup,
2033 1.24 fvdl nfsrv3_access,
2034 1.14 mycroft nfsrv_readlink,
2035 1.14 mycroft nfsrv_read,
2036 1.14 mycroft nfsrv_write,
2037 1.14 mycroft nfsrv_create,
2038 1.24 fvdl nfsrv_mkdir,
2039 1.24 fvdl nfsrv_symlink,
2040 1.24 fvdl nfsrv_mknod,
2041 1.14 mycroft nfsrv_remove,
2042 1.24 fvdl nfsrv_rmdir,
2043 1.14 mycroft nfsrv_rename,
2044 1.14 mycroft nfsrv_link,
2045 1.14 mycroft nfsrv_readdir,
2046 1.24 fvdl nfsrv_readdirplus,
2047 1.14 mycroft nfsrv_statfs,
2048 1.24 fvdl nfsrv_fsinfo,
2049 1.24 fvdl nfsrv_pathconf,
2050 1.24 fvdl nfsrv_commit,
2051 1.14 mycroft nqnfsrv_getlease,
2052 1.14 mycroft nqnfsrv_vacated,
2053 1.14 mycroft nfsrv_noop,
2054 1.24 fvdl nfsrv_noop
2055 1.14 mycroft };
2056 1.14 mycroft
2057 1.1 cgd /*
2058 1.14 mycroft * Socket upcall routine for the nfsd sockets.
2059 1.14 mycroft * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2060 1.14 mycroft * Essentially do as much as possible non-blocking, else punt and it will
2061 1.14 mycroft * be called with M_WAIT from an nfsd.
2062 1.1 cgd */
2063 1.14 mycroft void
2064 1.14 mycroft nfsrv_rcv(so, arg, waitflag)
2065 1.14 mycroft struct socket *so;
2066 1.14 mycroft caddr_t arg;
2067 1.14 mycroft int waitflag;
2068 1.1 cgd {
2069 1.55 augustss struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
2070 1.55 augustss struct mbuf *m;
2071 1.14 mycroft struct mbuf *mp, *nam;
2072 1.14 mycroft struct uio auio;
2073 1.14 mycroft int flags, error;
2074 1.1 cgd
2075 1.14 mycroft if ((slp->ns_flag & SLP_VALID) == 0)
2076 1.14 mycroft return;
2077 1.14 mycroft #ifdef notdef
2078 1.14 mycroft /*
2079 1.14 mycroft * Define this to test for nfsds handling this under heavy load.
2080 1.14 mycroft */
2081 1.14 mycroft if (waitflag == M_DONTWAIT) {
2082 1.14 mycroft slp->ns_flag |= SLP_NEEDQ; goto dorecs;
2083 1.1 cgd }
2084 1.14 mycroft #endif
2085 1.92 fvdl auio.uio_procp = NULL;
2086 1.14 mycroft if (so->so_type == SOCK_STREAM) {
2087 1.14 mycroft /*
2088 1.14 mycroft * If there are already records on the queue, defer soreceive()
2089 1.14 mycroft * to an nfsd so that there is feedback to the TCP layer that
2090 1.14 mycroft * the nfs servers are heavily loaded.
2091 1.14 mycroft */
2092 1.14 mycroft if (slp->ns_rec && waitflag == M_DONTWAIT) {
2093 1.14 mycroft slp->ns_flag |= SLP_NEEDQ;
2094 1.14 mycroft goto dorecs;
2095 1.14 mycroft }
2096 1.14 mycroft
2097 1.14 mycroft /*
2098 1.14 mycroft * Do soreceive().
2099 1.14 mycroft */
2100 1.14 mycroft auio.uio_resid = 1000000000;
2101 1.14 mycroft flags = MSG_DONTWAIT;
2102 1.43 matt error = (*so->so_receive)(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
2103 1.14 mycroft if (error || mp == (struct mbuf *)0) {
2104 1.14 mycroft if (error == EWOULDBLOCK)
2105 1.14 mycroft slp->ns_flag |= SLP_NEEDQ;
2106 1.14 mycroft else
2107 1.14 mycroft slp->ns_flag |= SLP_DISCONN;
2108 1.14 mycroft goto dorecs;
2109 1.14 mycroft }
2110 1.14 mycroft m = mp;
2111 1.14 mycroft if (slp->ns_rawend) {
2112 1.14 mycroft slp->ns_rawend->m_next = m;
2113 1.14 mycroft slp->ns_cc += 1000000000 - auio.uio_resid;
2114 1.14 mycroft } else {
2115 1.14 mycroft slp->ns_raw = m;
2116 1.14 mycroft slp->ns_cc = 1000000000 - auio.uio_resid;
2117 1.14 mycroft }
2118 1.14 mycroft while (m->m_next)
2119 1.14 mycroft m = m->m_next;
2120 1.14 mycroft slp->ns_rawend = m;
2121 1.14 mycroft
2122 1.14 mycroft /*
2123 1.14 mycroft * Now try and parse record(s) out of the raw stream data.
2124 1.14 mycroft */
2125 1.24 fvdl error = nfsrv_getstream(slp, waitflag);
2126 1.24 fvdl if (error) {
2127 1.14 mycroft if (error == EPERM)
2128 1.14 mycroft slp->ns_flag |= SLP_DISCONN;
2129 1.14 mycroft else
2130 1.14 mycroft slp->ns_flag |= SLP_NEEDQ;
2131 1.14 mycroft }
2132 1.14 mycroft } else {
2133 1.14 mycroft do {
2134 1.14 mycroft auio.uio_resid = 1000000000;
2135 1.14 mycroft flags = MSG_DONTWAIT;
2136 1.43 matt error = (*so->so_receive)(so, &nam, &auio, &mp,
2137 1.14 mycroft (struct mbuf **)0, &flags);
2138 1.14 mycroft if (mp) {
2139 1.14 mycroft if (nam) {
2140 1.14 mycroft m = nam;
2141 1.14 mycroft m->m_next = mp;
2142 1.14 mycroft } else
2143 1.14 mycroft m = mp;
2144 1.14 mycroft if (slp->ns_recend)
2145 1.14 mycroft slp->ns_recend->m_nextpkt = m;
2146 1.14 mycroft else
2147 1.14 mycroft slp->ns_rec = m;
2148 1.14 mycroft slp->ns_recend = m;
2149 1.14 mycroft m->m_nextpkt = (struct mbuf *)0;
2150 1.14 mycroft }
2151 1.14 mycroft if (error) {
2152 1.14 mycroft if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
2153 1.14 mycroft && error != EWOULDBLOCK) {
2154 1.14 mycroft slp->ns_flag |= SLP_DISCONN;
2155 1.14 mycroft goto dorecs;
2156 1.14 mycroft }
2157 1.14 mycroft }
2158 1.14 mycroft } while (mp);
2159 1.14 mycroft }
2160 1.14 mycroft
2161 1.14 mycroft /*
2162 1.14 mycroft * Now try and process the request records, non-blocking.
2163 1.14 mycroft */
2164 1.14 mycroft dorecs:
2165 1.14 mycroft if (waitflag == M_DONTWAIT &&
2166 1.14 mycroft (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
2167 1.14 mycroft nfsrv_wakenfsd(slp);
2168 1.1 cgd }
2169 1.1 cgd
2170 1.1 cgd /*
2171 1.14 mycroft * Try and extract an RPC request from the mbuf data list received on a
2172 1.14 mycroft * stream socket. The "waitflag" argument indicates whether or not it
2173 1.14 mycroft * can sleep.
2174 1.14 mycroft */
2175 1.23 christos int
2176 1.14 mycroft nfsrv_getstream(slp, waitflag)
2177 1.55 augustss struct nfssvc_sock *slp;
2178 1.14 mycroft int waitflag;
2179 1.1 cgd {
2180 1.55 augustss struct mbuf *m, **mpp;
2181 1.81 yamt struct mbuf *recm;
2182 1.24 fvdl u_int32_t recmark;
2183 1.1 cgd
2184 1.14 mycroft if (slp->ns_flag & SLP_GETSTREAM)
2185 1.14 mycroft panic("nfs getstream");
2186 1.14 mycroft slp->ns_flag |= SLP_GETSTREAM;
2187 1.14 mycroft for (;;) {
2188 1.82 yamt if (slp->ns_reclen == 0) {
2189 1.82 yamt if (slp->ns_cc < NFSX_UNSIGNED) {
2190 1.82 yamt slp->ns_flag &= ~SLP_GETSTREAM;
2191 1.82 yamt return (0);
2192 1.82 yamt }
2193 1.82 yamt m = slp->ns_raw;
2194 1.82 yamt m_copydata(m, 0, NFSX_UNSIGNED, (caddr_t)&recmark);
2195 1.82 yamt m_adj(m, NFSX_UNSIGNED);
2196 1.82 yamt slp->ns_cc -= NFSX_UNSIGNED;
2197 1.82 yamt recmark = ntohl(recmark);
2198 1.82 yamt slp->ns_reclen = recmark & ~0x80000000;
2199 1.82 yamt if (recmark & 0x80000000)
2200 1.82 yamt slp->ns_flag |= SLP_LASTFRAG;
2201 1.82 yamt else
2202 1.82 yamt slp->ns_flag &= ~SLP_LASTFRAG;
2203 1.82 yamt if (slp->ns_reclen > NFS_MAXPACKET) {
2204 1.82 yamt slp->ns_flag &= ~SLP_GETSTREAM;
2205 1.82 yamt return (EPERM);
2206 1.82 yamt }
2207 1.82 yamt }
2208 1.82 yamt
2209 1.82 yamt /*
2210 1.82 yamt * Now get the record part.
2211 1.82 yamt *
2212 1.82 yamt * Note that slp->ns_reclen may be 0. Linux sometimes
2213 1.82 yamt * generates 0-length records.
2214 1.82 yamt */
2215 1.82 yamt if (slp->ns_cc == slp->ns_reclen) {
2216 1.82 yamt recm = slp->ns_raw;
2217 1.82 yamt slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
2218 1.82 yamt slp->ns_cc = slp->ns_reclen = 0;
2219 1.82 yamt } else if (slp->ns_cc > slp->ns_reclen) {
2220 1.82 yamt recm = slp->ns_raw;
2221 1.82 yamt m = m_split(recm, slp->ns_reclen, waitflag);
2222 1.82 yamt if (m == NULL) {
2223 1.82 yamt slp->ns_flag &= ~SLP_GETSTREAM;
2224 1.82 yamt return (EWOULDBLOCK);
2225 1.82 yamt }
2226 1.102.2.2 tron m_claimm(recm, &nfs_mowner);
2227 1.82 yamt slp->ns_raw = m;
2228 1.82 yamt if (m->m_next == NULL)
2229 1.82 yamt slp->ns_rawend = m;
2230 1.82 yamt slp->ns_cc -= slp->ns_reclen;
2231 1.82 yamt slp->ns_reclen = 0;
2232 1.82 yamt } else {
2233 1.14 mycroft slp->ns_flag &= ~SLP_GETSTREAM;
2234 1.14 mycroft return (0);
2235 1.14 mycroft }
2236 1.14 mycroft
2237 1.82 yamt /*
2238 1.82 yamt * Accumulate the fragments into a record.
2239 1.82 yamt */
2240 1.82 yamt mpp = &slp->ns_frag;
2241 1.82 yamt while (*mpp)
2242 1.82 yamt mpp = &((*mpp)->m_next);
2243 1.82 yamt *mpp = recm;
2244 1.82 yamt if (slp->ns_flag & SLP_LASTFRAG) {
2245 1.82 yamt if (slp->ns_recend)
2246 1.82 yamt slp->ns_recend->m_nextpkt = slp->ns_frag;
2247 1.82 yamt else
2248 1.82 yamt slp->ns_rec = slp->ns_frag;
2249 1.82 yamt slp->ns_recend = slp->ns_frag;
2250 1.82 yamt slp->ns_frag = (struct mbuf *)0;
2251 1.14 mycroft }
2252 1.1 cgd }
2253 1.1 cgd }
2254 1.1 cgd
2255 1.1 cgd /*
2256 1.14 mycroft * Parse an RPC header.
2257 1.14 mycroft */
2258 1.23 christos int
2259 1.24 fvdl nfsrv_dorec(slp, nfsd, ndp)
2260 1.55 augustss struct nfssvc_sock *slp;
2261 1.24 fvdl struct nfsd *nfsd;
2262 1.24 fvdl struct nfsrv_descript **ndp;
2263 1.14 mycroft {
2264 1.55 augustss struct mbuf *m, *nam;
2265 1.55 augustss struct nfsrv_descript *nd;
2266 1.14 mycroft int error;
2267 1.1 cgd
2268 1.24 fvdl *ndp = NULL;
2269 1.14 mycroft if ((slp->ns_flag & SLP_VALID) == 0 ||
2270 1.14 mycroft (m = slp->ns_rec) == (struct mbuf *)0)
2271 1.14 mycroft return (ENOBUFS);
2272 1.24 fvdl slp->ns_rec = m->m_nextpkt;
2273 1.24 fvdl if (slp->ns_rec)
2274 1.14 mycroft m->m_nextpkt = (struct mbuf *)0;
2275 1.14 mycroft else
2276 1.14 mycroft slp->ns_recend = (struct mbuf *)0;
2277 1.14 mycroft if (m->m_type == MT_SONAME) {
2278 1.24 fvdl nam = m;
2279 1.24 fvdl m = m->m_next;
2280 1.24 fvdl nam->m_next = NULL;
2281 1.24 fvdl } else
2282 1.24 fvdl nam = NULL;
2283 1.88 yamt nd = pool_get(&nfs_srvdesc_pool, PR_WAITOK);
2284 1.24 fvdl nd->nd_md = nd->nd_mrep = m;
2285 1.24 fvdl nd->nd_nam2 = nam;
2286 1.24 fvdl nd->nd_dpos = mtod(m, caddr_t);
2287 1.24 fvdl error = nfs_getreq(nd, nfsd, TRUE);
2288 1.24 fvdl if (error) {
2289 1.24 fvdl m_freem(nam);
2290 1.88 yamt pool_put(&nfs_srvdesc_pool, nd);
2291 1.14 mycroft return (error);
2292 1.14 mycroft }
2293 1.24 fvdl *ndp = nd;
2294 1.24 fvdl nfsd->nfsd_nd = nd;
2295 1.1 cgd return (0);
2296 1.1 cgd }
2297 1.1 cgd
2298 1.24 fvdl
2299 1.1 cgd /*
2300 1.14 mycroft * Search for a sleeping nfsd and wake it up.
2301 1.14 mycroft * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
2302 1.14 mycroft * running nfsds will go look for the work in the nfssvc_sock list.
2303 1.14 mycroft */
2304 1.14 mycroft void
2305 1.14 mycroft nfsrv_wakenfsd(slp)
2306 1.14 mycroft struct nfssvc_sock *slp;
2307 1.14 mycroft {
2308 1.55 augustss struct nfsd *nd;
2309 1.14 mycroft
2310 1.14 mycroft if ((slp->ns_flag & SLP_VALID) == 0)
2311 1.14 mycroft return;
2312 1.90 yamt simple_lock(&nfsd_slock);
2313 1.90 yamt if (slp->ns_flag & SLP_DOREC) {
2314 1.90 yamt simple_unlock(&nfsd_slock);
2315 1.90 yamt return;
2316 1.90 yamt }
2317 1.90 yamt nd = SLIST_FIRST(&nfsd_idle_head);
2318 1.90 yamt if (nd) {
2319 1.90 yamt SLIST_REMOVE_HEAD(&nfsd_idle_head, nfsd_idle);
2320 1.90 yamt simple_unlock(&nfsd_slock);
2321 1.90 yamt
2322 1.90 yamt KASSERT(nd->nfsd_flag & NFSD_WAITING);
2323 1.90 yamt nd->nfsd_flag &= ~NFSD_WAITING;
2324 1.90 yamt if (nd->nfsd_slp)
2325 1.90 yamt panic("nfsd wakeup");
2326 1.90 yamt slp->ns_sref++;
2327 1.90 yamt nd->nfsd_slp = slp;
2328 1.90 yamt wakeup(nd);
2329 1.90 yamt return;
2330 1.14 mycroft }
2331 1.14 mycroft slp->ns_flag |= SLP_DOREC;
2332 1.17 mycroft nfsd_head_flag |= NFSD_CHECKSLP;
2333 1.90 yamt TAILQ_INSERT_TAIL(&nfssvc_sockpending, slp, ns_pending);
2334 1.90 yamt simple_unlock(&nfsd_slock);
2335 1.1 cgd }
2336 1.14 mycroft #endif /* NFSSERVER */
2337