nfs_socket.c revision 1.16 1 /* $NetBSD: nfs_socket.c,v 1.16 1994/08/17 11:41:42 mycroft Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94
39 */
40
41 /*
42 * Socket operations for use by nfs
43 */
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/mount.h>
49 #include <sys/kernel.h>
50 #include <sys/mbuf.h>
51 #include <sys/vnode.h>
52 #include <sys/domain.h>
53 #include <sys/protosw.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/syslog.h>
57 #include <sys/tprintf.h>
58
59 #include <netinet/in.h>
60 #include <netinet/tcp.h>
61 #include <nfs/rpcv2.h>
62 #include <nfs/nfsv2.h>
63 #include <nfs/nfs.h>
64 #include <nfs/xdr_subs.h>
65 #include <nfs/nfsm_subs.h>
66 #include <nfs/nfsmount.h>
67 #include <nfs/nfsnode.h>
68 #include <nfs/nfsrtt.h>
69 #include <nfs/nqnfs.h>
70
71 #define TRUE 1
72 #define FALSE 0
73
74 /*
75 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
76 * Use the mean and mean deviation of rtt for the appropriate type of rpc
77 * for the frequent rpcs and a default for the others.
78 * The justification for doing "other" this way is that these rpcs
79 * happen so infrequently that timer est. would probably be stale.
80 * Also, since many of these rpcs are
81 * non-idempotent, a conservative timeout is desired.
82 * getattr, lookup - A+2D
83 * read, write - A+4D
84 * other - nm_timeo
85 */
86 #define NFS_RTO(n, t) \
87 ((t) == 0 ? (n)->nm_timeo : \
88 ((t) < 3 ? \
89 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
90 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
91 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
92 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
93 /*
94 * External data, mostly RPC constants in XDR form
95 */
96 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
97 rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred,
98 rpc_auth_kerb;
99 extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers;
100 extern time_t nqnfsstarttime;
101 extern int nonidempotent[NFS_NPROCS];
102
103 /*
104 * Maps errno values to nfs error numbers.
105 * Use NFSERR_IO as the catch all for ones not specifically defined in
106 * RFC 1094.
107 */
108 static int nfsrv_errmap[ELAST] = {
109 NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO,
110 NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
111 NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO,
112 NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR,
113 NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
114 NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS,
115 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
116 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
117 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
118 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
119 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
120 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
121 NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO,
122 NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE,
123 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
124 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
125 NFSERR_IO,
126 };
127
128 /*
129 * Defines which timer to use for the procnum.
130 * 0 - default
131 * 1 - getattr
132 * 2 - lookup
133 * 3 - read
134 * 4 - write
135 */
136 static int proct[NFS_NPROCS] = {
137 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
138 };
139
140 /*
141 * There is a congestion window for outstanding rpcs maintained per mount
142 * point. The cwnd size is adjusted in roughly the way that:
143 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
144 * SIGCOMM '88". ACM, August 1988.
145 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
146 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
147 * of rpcs is in progress.
148 * (The sent count and cwnd are scaled for integer arith.)
149 * Variants of "slow start" were tried and were found to be too much of a
150 * performance hit (ave. rtt 3 times larger),
151 * I suspect due to the large rtt that nfs rpcs have.
152 */
153 #define NFS_CWNDSCALE 256
154 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
155 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
156 int nfs_sbwait();
157 void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock();
158 void nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease();
159 struct mbuf *nfsm_rpchead();
160 int nfsrtton = 0;
161 struct nfsrtt nfsrtt;
162 struct nfsd nfsd_head;
163
164 /*
165 * Initialize sockets and congestion for a new NFS connection.
166 * We do not free the sockaddr if error.
167 */
168 nfs_connect(nmp, rep)
169 register struct nfsmount *nmp;
170 struct nfsreq *rep;
171 {
172 register struct socket *so;
173 int s, error, rcvreserve, sndreserve;
174 struct sockaddr *saddr;
175 struct sockaddr_in *sin;
176 struct mbuf *m;
177 u_short tport;
178
179 nmp->nm_so = (struct socket *)0;
180 saddr = mtod(nmp->nm_nam, struct sockaddr *);
181 if (error = socreate(saddr->sa_family,
182 &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
183 goto bad;
184 so = nmp->nm_so;
185 nmp->nm_soflags = so->so_proto->pr_flags;
186
187 /*
188 * Some servers require that the client port be a reserved port number.
189 */
190 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
191 MGET(m, M_WAIT, MT_SONAME);
192 sin = mtod(m, struct sockaddr_in *);
193 sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
194 sin->sin_family = AF_INET;
195 sin->sin_addr.s_addr = INADDR_ANY;
196 tport = IPPORT_RESERVED - 1;
197 sin->sin_port = htons(tport);
198 while ((error = sobind(so, m)) == EADDRINUSE &&
199 --tport > IPPORT_RESERVED / 2)
200 sin->sin_port = htons(tport);
201 m_freem(m);
202 if (error)
203 goto bad;
204 }
205
206 /*
207 * Protocols that do not require connections may be optionally left
208 * unconnected for servers that reply from a port other than NFS_PORT.
209 */
210 if (nmp->nm_flag & NFSMNT_NOCONN) {
211 if (nmp->nm_soflags & PR_CONNREQUIRED) {
212 error = ENOTCONN;
213 goto bad;
214 }
215 } else {
216 if (error = soconnect(so, nmp->nm_nam))
217 goto bad;
218
219 /*
220 * Wait for the connection to complete. Cribbed from the
221 * connect system call but with the wait timing out so
222 * that interruptible mounts don't hang here for a long time.
223 */
224 s = splnet();
225 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
226 (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
227 "nfscon", 2 * hz);
228 if ((so->so_state & SS_ISCONNECTING) &&
229 so->so_error == 0 && rep &&
230 (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
231 so->so_state &= ~SS_ISCONNECTING;
232 splx(s);
233 goto bad;
234 }
235 }
236 if (so->so_error) {
237 error = so->so_error;
238 so->so_error = 0;
239 splx(s);
240 goto bad;
241 }
242 splx(s);
243 }
244 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
245 so->so_rcv.sb_timeo = (5 * hz);
246 so->so_snd.sb_timeo = (5 * hz);
247 } else {
248 so->so_rcv.sb_timeo = 0;
249 so->so_snd.sb_timeo = 0;
250 }
251 if (nmp->nm_sotype == SOCK_DGRAM) {
252 sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
253 rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR;
254 } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
255 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
256 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
257 } else {
258 if (nmp->nm_sotype != SOCK_STREAM)
259 panic("nfscon sotype");
260 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
261 MGET(m, M_WAIT, MT_SOOPTS);
262 *mtod(m, int *) = 1;
263 m->m_len = sizeof(int);
264 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
265 }
266 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
267 MGET(m, M_WAIT, MT_SOOPTS);
268 *mtod(m, int *) = 1;
269 m->m_len = sizeof(int);
270 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
271 }
272 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
273 * 2;
274 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
275 * 2;
276 }
277 if (error = soreserve(so, sndreserve, rcvreserve))
278 goto bad;
279 so->so_rcv.sb_flags |= SB_NOINTR;
280 so->so_snd.sb_flags |= SB_NOINTR;
281
282 /* Initialize other non-zero congestion variables */
283 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
284 nmp->nm_srtt[4] = (NFS_TIMEO << 3);
285 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
286 nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
287 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
288 nmp->nm_sent = 0;
289 nmp->nm_timeouts = 0;
290 return (0);
291
292 bad:
293 nfs_disconnect(nmp);
294 return (error);
295 }
296
297 /*
298 * Reconnect routine:
299 * Called when a connection is broken on a reliable protocol.
300 * - clean up the old socket
301 * - nfs_connect() again
302 * - set R_MUSTRESEND for all outstanding requests on mount point
303 * If this fails the mount point is DEAD!
304 * nb: Must be called with the nfs_sndlock() set on the mount point.
305 */
306 nfs_reconnect(rep)
307 register struct nfsreq *rep;
308 {
309 register struct nfsreq *rp;
310 register struct nfsmount *nmp = rep->r_nmp;
311 int error;
312
313 nfs_disconnect(nmp);
314 while (error = nfs_connect(nmp, rep)) {
315 if (error == EINTR || error == ERESTART)
316 return (EINTR);
317 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
318 }
319
320 /*
321 * Loop through outstanding request list and fix up all requests
322 * on old socket.
323 */
324 for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) {
325 if (rp->r_nmp == nmp)
326 rp->r_flags |= R_MUSTRESEND;
327 }
328 return (0);
329 }
330
331 /*
332 * NFS disconnect. Clean up and unlink.
333 */
334 void
335 nfs_disconnect(nmp)
336 register struct nfsmount *nmp;
337 {
338 register struct socket *so;
339
340 if (nmp->nm_so) {
341 so = nmp->nm_so;
342 nmp->nm_so = (struct socket *)0;
343 soshutdown(so, 2);
344 soclose(so);
345 }
346 }
347
348 /*
349 * This is the nfs send routine. For connection based socket types, it
350 * must be called with an nfs_sndlock() on the socket.
351 * "rep == NULL" indicates that it has been called from a server.
352 * For the client side:
353 * - return EINTR if the RPC is terminated, 0 otherwise
354 * - set R_MUSTRESEND if the send fails for any reason
355 * - do any cleanup required by recoverable socket errors (???)
356 * For the server side:
357 * - return EINTR or ERESTART if interrupted by a signal
358 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
359 * - do any cleanup required by recoverable socket errors (???)
360 */
361 nfs_send(so, nam, top, rep)
362 register struct socket *so;
363 struct mbuf *nam;
364 register struct mbuf *top;
365 struct nfsreq *rep;
366 {
367 struct mbuf *sendnam;
368 int error, soflags, flags;
369
370 if (rep) {
371 if (rep->r_flags & R_SOFTTERM) {
372 m_freem(top);
373 return (EINTR);
374 }
375 if ((so = rep->r_nmp->nm_so) == NULL) {
376 rep->r_flags |= R_MUSTRESEND;
377 m_freem(top);
378 return (0);
379 }
380 rep->r_flags &= ~R_MUSTRESEND;
381 soflags = rep->r_nmp->nm_soflags;
382 } else
383 soflags = so->so_proto->pr_flags;
384 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
385 sendnam = (struct mbuf *)0;
386 else
387 sendnam = nam;
388 if (so->so_type == SOCK_SEQPACKET)
389 flags = MSG_EOR;
390 else
391 flags = 0;
392
393 error = sosend(so, sendnam, (struct uio *)0, top,
394 (struct mbuf *)0, flags);
395 if (error) {
396 if (rep) {
397 log(LOG_INFO, "nfs send error %d for server %s\n",error,
398 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
399 /*
400 * Deal with errors for the client side.
401 */
402 if (rep->r_flags & R_SOFTTERM)
403 error = EINTR;
404 else
405 rep->r_flags |= R_MUSTRESEND;
406 } else
407 log(LOG_INFO, "nfsd send error %d\n", error);
408
409 /*
410 * Handle any recoverable (soft) socket errors here. (???)
411 */
412 if (error != EINTR && error != ERESTART &&
413 error != EWOULDBLOCK && error != EPIPE)
414 error = 0;
415 }
416 return (error);
417 }
418
419 #ifdef NFSCLIENT
420 /*
421 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
422 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
423 * Mark and consolidate the data into a new mbuf list.
424 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
425 * small mbufs.
426 * For SOCK_STREAM we must be very careful to read an entire record once
427 * we have read any of it, even if the system call has been interrupted.
428 */
429 nfs_receive(rep, aname, mp)
430 register struct nfsreq *rep;
431 struct mbuf **aname;
432 struct mbuf **mp;
433 {
434 register struct socket *so;
435 struct uio auio;
436 struct iovec aio;
437 register struct mbuf *m;
438 struct mbuf *control;
439 u_long len;
440 struct mbuf **getnam;
441 int error, sotype, rcvflg;
442 struct proc *p = curproc; /* XXX */
443
444 /*
445 * Set up arguments for soreceive()
446 */
447 *mp = (struct mbuf *)0;
448 *aname = (struct mbuf *)0;
449 sotype = rep->r_nmp->nm_sotype;
450
451 /*
452 * For reliable protocols, lock against other senders/receivers
453 * in case a reconnect is necessary.
454 * For SOCK_STREAM, first get the Record Mark to find out how much
455 * more there is to get.
456 * We must lock the socket against other receivers
457 * until we have an entire rpc request/reply.
458 */
459 if (sotype != SOCK_DGRAM) {
460 if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep))
461 return (error);
462 tryagain:
463 /*
464 * Check for fatal errors and resending request.
465 */
466 /*
467 * Ugh: If a reconnect attempt just happened, nm_so
468 * would have changed. NULL indicates a failed
469 * attempt that has essentially shut down this
470 * mount point.
471 */
472 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
473 nfs_sndunlock(&rep->r_nmp->nm_flag);
474 return (EINTR);
475 }
476 if ((so = rep->r_nmp->nm_so) == NULL) {
477 if (error = nfs_reconnect(rep)) {
478 nfs_sndunlock(&rep->r_nmp->nm_flag);
479 return (error);
480 }
481 goto tryagain;
482 }
483 while (rep->r_flags & R_MUSTRESEND) {
484 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
485 nfsstats.rpcretries++;
486 if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) {
487 if (error == EINTR || error == ERESTART ||
488 (error = nfs_reconnect(rep))) {
489 nfs_sndunlock(&rep->r_nmp->nm_flag);
490 return (error);
491 }
492 goto tryagain;
493 }
494 }
495 nfs_sndunlock(&rep->r_nmp->nm_flag);
496 if (sotype == SOCK_STREAM) {
497 aio.iov_base = (caddr_t) &len;
498 aio.iov_len = sizeof(u_long);
499 auio.uio_iov = &aio;
500 auio.uio_iovcnt = 1;
501 auio.uio_segflg = UIO_SYSSPACE;
502 auio.uio_rw = UIO_READ;
503 auio.uio_offset = 0;
504 auio.uio_resid = sizeof(u_long);
505 auio.uio_procp = p;
506 do {
507 rcvflg = MSG_WAITALL;
508 error = soreceive(so, (struct mbuf **)0, &auio,
509 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
510 if (error == EWOULDBLOCK && rep) {
511 if (rep->r_flags & R_SOFTTERM)
512 return (EINTR);
513 }
514 } while (error == EWOULDBLOCK);
515 if (!error && auio.uio_resid > 0) {
516 log(LOG_INFO,
517 "short receive (%d/%d) from nfs server %s\n",
518 sizeof(u_long) - auio.uio_resid,
519 sizeof(u_long),
520 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
521 error = EPIPE;
522 }
523 if (error)
524 goto errout;
525 len = ntohl(len) & ~0x80000000;
526 /*
527 * This is SERIOUS! We are out of sync with the sender
528 * and forcing a disconnect/reconnect is all I can do.
529 */
530 if (len > NFS_MAXPACKET) {
531 log(LOG_ERR, "%s (%d) from nfs server %s\n",
532 "impossible packet length",
533 len,
534 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
535 error = EFBIG;
536 goto errout;
537 }
538 auio.uio_resid = len;
539 do {
540 rcvflg = MSG_WAITALL;
541 error = soreceive(so, (struct mbuf **)0,
542 &auio, mp, (struct mbuf **)0, &rcvflg);
543 } while (error == EWOULDBLOCK || error == EINTR ||
544 error == ERESTART);
545 if (!error && auio.uio_resid > 0) {
546 log(LOG_INFO,
547 "short receive (%d/%d) from nfs server %s\n",
548 len - auio.uio_resid, len,
549 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
550 error = EPIPE;
551 }
552 } else {
553 /*
554 * NB: Since uio_resid is big, MSG_WAITALL is ignored
555 * and soreceive() will return when it has either a
556 * control msg or a data msg.
557 * We have no use for control msg., but must grab them
558 * and then throw them away so we know what is going
559 * on.
560 */
561 auio.uio_resid = len = 100000000; /* Anything Big */
562 auio.uio_procp = p;
563 do {
564 rcvflg = 0;
565 error = soreceive(so, (struct mbuf **)0,
566 &auio, mp, &control, &rcvflg);
567 if (control)
568 m_freem(control);
569 if (error == EWOULDBLOCK && rep) {
570 if (rep->r_flags & R_SOFTTERM)
571 return (EINTR);
572 }
573 } while (error == EWOULDBLOCK ||
574 (!error && *mp == NULL && control));
575 if ((rcvflg & MSG_EOR) == 0)
576 printf("Egad!!\n");
577 if (!error && *mp == NULL)
578 error = EPIPE;
579 len -= auio.uio_resid;
580 }
581 errout:
582 if (error && error != EINTR && error != ERESTART) {
583 m_freem(*mp);
584 *mp = (struct mbuf *)0;
585 if (error != EPIPE)
586 log(LOG_INFO,
587 "receive error %d from nfs server %s\n",
588 error,
589 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
590 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
591 if (!error)
592 error = nfs_reconnect(rep);
593 if (!error)
594 goto tryagain;
595 }
596 } else {
597 if ((so = rep->r_nmp->nm_so) == NULL)
598 return (EACCES);
599 if (so->so_state & SS_ISCONNECTED)
600 getnam = (struct mbuf **)0;
601 else
602 getnam = aname;
603 auio.uio_resid = len = 1000000;
604 auio.uio_procp = p;
605 do {
606 rcvflg = 0;
607 error = soreceive(so, getnam, &auio, mp,
608 (struct mbuf **)0, &rcvflg);
609 if (error == EWOULDBLOCK &&
610 (rep->r_flags & R_SOFTTERM))
611 return (EINTR);
612 } while (error == EWOULDBLOCK);
613 len -= auio.uio_resid;
614 }
615 if (error) {
616 m_freem(*mp);
617 *mp = (struct mbuf *)0;
618 }
619 /*
620 * Search for any mbufs that are not a multiple of 4 bytes long
621 * or with m_data not longword aligned.
622 * These could cause pointer alignment problems, so copy them to
623 * well aligned mbufs.
624 */
625 nfs_realign(*mp, 5 * NFSX_UNSIGNED);
626 return (error);
627 }
628
629 /*
630 * Implement receipt of reply on a socket.
631 * We must search through the list of received datagrams matching them
632 * with outstanding requests using the xid, until ours is found.
633 */
634 /* ARGSUSED */
635 nfs_reply(myrep)
636 struct nfsreq *myrep;
637 {
638 register struct nfsreq *rep;
639 register struct nfsmount *nmp = myrep->r_nmp;
640 register long t1;
641 struct mbuf *mrep, *nam, *md;
642 u_long rxid, *tl;
643 caddr_t dpos, cp2;
644 int error;
645
646 /*
647 * Loop around until we get our own reply
648 */
649 for (;;) {
650 /*
651 * Lock against other receivers so that I don't get stuck in
652 * sbwait() after someone else has received my reply for me.
653 * Also necessary for connection based protocols to avoid
654 * race conditions during a reconnect.
655 */
656 if (error = nfs_rcvlock(myrep))
657 return (error);
658 /* Already received, bye bye */
659 if (myrep->r_mrep != NULL) {
660 nfs_rcvunlock(&nmp->nm_flag);
661 return (0);
662 }
663 /*
664 * Get the next Rpc reply off the socket
665 */
666 error = nfs_receive(myrep, &nam, &mrep);
667 nfs_rcvunlock(&nmp->nm_flag);
668 if (error) {
669
670 /*
671 * Ignore routing errors on connectionless protocols??
672 */
673 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
674 nmp->nm_so->so_error = 0;
675 if (myrep->r_flags & R_GETONEREP)
676 return (0);
677 continue;
678 }
679 return (error);
680 }
681 if (nam)
682 m_freem(nam);
683
684 /*
685 * Get the xid and check that it is an rpc reply
686 */
687 md = mrep;
688 dpos = mtod(md, caddr_t);
689 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
690 rxid = *tl++;
691 if (*tl != rpc_reply) {
692 if (nmp->nm_flag & NFSMNT_NQNFS) {
693 if (nqnfs_callback(nmp, mrep, md, dpos))
694 nfsstats.rpcinvalid++;
695 } else {
696 nfsstats.rpcinvalid++;
697 m_freem(mrep);
698 }
699 nfsmout:
700 if (myrep->r_flags & R_GETONEREP)
701 return (0);
702 continue;
703 }
704
705 /*
706 * Loop through the request list to match up the reply
707 * Iff no match, just drop the datagram
708 */
709 for (rep = nfs_reqq.tqh_first; rep != 0;
710 rep = rep->r_chain.tqe_next) {
711 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
712 /* Found it.. */
713 rep->r_mrep = mrep;
714 rep->r_md = md;
715 rep->r_dpos = dpos;
716 if (nfsrtton) {
717 struct rttl *rt;
718
719 rt = &nfsrtt.rttl[nfsrtt.pos];
720 rt->proc = rep->r_procnum;
721 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
722 rt->sent = nmp->nm_sent;
723 rt->cwnd = nmp->nm_cwnd;
724 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
725 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
726 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
727 rt->tstamp = time;
728 if (rep->r_flags & R_TIMING)
729 rt->rtt = rep->r_rtt;
730 else
731 rt->rtt = 1000000;
732 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
733 }
734 /*
735 * Update congestion window.
736 * Do the additive increase of
737 * one rpc/rtt.
738 */
739 if (nmp->nm_cwnd <= nmp->nm_sent) {
740 nmp->nm_cwnd +=
741 (NFS_CWNDSCALE * NFS_CWNDSCALE +
742 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
743 if (nmp->nm_cwnd > NFS_MAXCWND)
744 nmp->nm_cwnd = NFS_MAXCWND;
745 }
746 rep->r_flags &= ~R_SENT;
747 nmp->nm_sent -= NFS_CWNDSCALE;
748 /*
749 * Update rtt using a gain of 0.125 on the mean
750 * and a gain of 0.25 on the deviation.
751 */
752 if (rep->r_flags & R_TIMING) {
753 /*
754 * Since the timer resolution of
755 * NFS_HZ is so course, it can often
756 * result in r_rtt == 0. Since
757 * r_rtt == N means that the actual
758 * rtt is between N+dt and N+2-dt ticks,
759 * add 1.
760 */
761 t1 = rep->r_rtt + 1;
762 t1 -= (NFS_SRTT(rep) >> 3);
763 NFS_SRTT(rep) += t1;
764 if (t1 < 0)
765 t1 = -t1;
766 t1 -= (NFS_SDRTT(rep) >> 2);
767 NFS_SDRTT(rep) += t1;
768 }
769 nmp->nm_timeouts = 0;
770 break;
771 }
772 }
773 /*
774 * If not matched to a request, drop it.
775 * If it's mine, get out.
776 */
777 if (rep == 0) {
778 nfsstats.rpcunexpected++;
779 m_freem(mrep);
780 } else if (rep == myrep) {
781 if (rep->r_mrep == NULL)
782 panic("nfsreply nil");
783 return (0);
784 }
785 if (myrep->r_flags & R_GETONEREP)
786 return (0);
787 }
788 }
789
790 /*
791 * nfs_request - goes something like this
792 * - fill in request struct
793 * - links it into list
794 * - calls nfs_send() for first transmit
795 * - calls nfs_receive() to get reply
796 * - break down rpc header and return with nfs reply pointed to
797 * by mrep or error
798 * nb: always frees up mreq mbuf list
799 */
800 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
801 struct vnode *vp;
802 struct mbuf *mrest;
803 int procnum;
804 struct proc *procp;
805 struct ucred *cred;
806 struct mbuf **mrp;
807 struct mbuf **mdp;
808 caddr_t *dposp;
809 {
810 register struct mbuf *m, *mrep;
811 register struct nfsreq *rep;
812 register u_long *tl;
813 register int i;
814 struct nfsmount *nmp;
815 struct mbuf *md, *mheadend;
816 struct nfsreq *reph;
817 struct nfsnode *np;
818 time_t reqtime, waituntil;
819 caddr_t dpos, cp2;
820 int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
821 int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
822 u_long xid;
823 u_quad_t frev;
824 char *auth_str;
825
826 nmp = VFSTONFS(vp->v_mount);
827 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
828 rep->r_nmp = nmp;
829 rep->r_vp = vp;
830 rep->r_procp = procp;
831 rep->r_procnum = procnum;
832 i = 0;
833 m = mrest;
834 while (m) {
835 i += m->m_len;
836 m = m->m_next;
837 }
838 mrest_len = i;
839
840 /*
841 * Get the RPC header with authorization.
842 */
843 kerbauth:
844 auth_str = (char *)0;
845 if (nmp->nm_flag & NFSMNT_KERB) {
846 if (failed_auth) {
847 error = nfs_getauth(nmp, rep, cred, &auth_type,
848 &auth_str, &auth_len);
849 if (error) {
850 free((caddr_t)rep, M_NFSREQ);
851 m_freem(mrest);
852 return (error);
853 }
854 } else {
855 auth_type = RPCAUTH_UNIX;
856 auth_len = 5 * NFSX_UNSIGNED;
857 }
858 } else {
859 auth_type = RPCAUTH_UNIX;
860 if (cred->cr_ngroups < 1)
861 panic("nfsreq nogrps");
862 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
863 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
864 5 * NFSX_UNSIGNED;
865 }
866 m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum,
867 auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid);
868 if (auth_str)
869 free(auth_str, M_TEMP);
870
871 /*
872 * For stream protocols, insert a Sun RPC Record Mark.
873 */
874 if (nmp->nm_sotype == SOCK_STREAM) {
875 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
876 *mtod(m, u_long *) = htonl(0x80000000 |
877 (m->m_pkthdr.len - NFSX_UNSIGNED));
878 }
879 rep->r_mreq = m;
880 rep->r_xid = xid;
881 tryagain:
882 if (nmp->nm_flag & NFSMNT_SOFT)
883 rep->r_retry = nmp->nm_retry;
884 else
885 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
886 rep->r_rtt = rep->r_rexmit = 0;
887 if (proct[procnum] > 0)
888 rep->r_flags = R_TIMING;
889 else
890 rep->r_flags = 0;
891 rep->r_mrep = NULL;
892
893 /*
894 * Do the client side RPC.
895 */
896 nfsstats.rpcrequests++;
897 /*
898 * Chain request into list of outstanding requests. Be sure
899 * to put it LAST so timer finds oldest requests first.
900 */
901 s = splsoftclock();
902 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
903
904 /* Get send time for nqnfs */
905 reqtime = time.tv_sec;
906
907 /*
908 * If backing off another request or avoiding congestion, don't
909 * send this one now but let timer do it. If not timing a request,
910 * do it now.
911 */
912 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
913 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
914 nmp->nm_sent < nmp->nm_cwnd)) {
915 splx(s);
916 if (nmp->nm_soflags & PR_CONNREQUIRED)
917 error = nfs_sndlock(&nmp->nm_flag, rep);
918 if (!error) {
919 m = m_copym(m, 0, M_COPYALL, M_WAIT);
920 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
921 if (nmp->nm_soflags & PR_CONNREQUIRED)
922 nfs_sndunlock(&nmp->nm_flag);
923 }
924 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
925 nmp->nm_sent += NFS_CWNDSCALE;
926 rep->r_flags |= R_SENT;
927 }
928 } else {
929 splx(s);
930 rep->r_rtt = -1;
931 }
932
933 /*
934 * Wait for the reply from our send or the timer's.
935 */
936 if (!error || error == EPIPE)
937 error = nfs_reply(rep);
938
939 /*
940 * RPC done, unlink the request.
941 */
942 s = splsoftclock();
943 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
944 splx(s);
945
946 /*
947 * Decrement the outstanding request count.
948 */
949 if (rep->r_flags & R_SENT) {
950 rep->r_flags &= ~R_SENT; /* paranoia */
951 nmp->nm_sent -= NFS_CWNDSCALE;
952 }
953
954 /*
955 * If there was a successful reply and a tprintf msg.
956 * tprintf a response.
957 */
958 if (!error && (rep->r_flags & R_TPRINTFMSG))
959 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
960 "is alive again");
961 mrep = rep->r_mrep;
962 md = rep->r_md;
963 dpos = rep->r_dpos;
964 if (error) {
965 m_freem(rep->r_mreq);
966 free((caddr_t)rep, M_NFSREQ);
967 return (error);
968 }
969
970 /*
971 * break down the rpc header and check if ok
972 */
973 nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
974 if (*tl++ == rpc_msgdenied) {
975 if (*tl == rpc_mismatch)
976 error = EOPNOTSUPP;
977 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
978 if (*tl == rpc_rejectedcred && failed_auth == 0) {
979 failed_auth++;
980 mheadend->m_next = (struct mbuf *)0;
981 m_freem(mrep);
982 m_freem(rep->r_mreq);
983 goto kerbauth;
984 } else
985 error = EAUTH;
986 } else
987 error = EACCES;
988 m_freem(mrep);
989 m_freem(rep->r_mreq);
990 free((caddr_t)rep, M_NFSREQ);
991 return (error);
992 }
993
994 /*
995 * skip over the auth_verf, someday we may want to cache auth_short's
996 * for nfs_reqhead(), but for now just dump it
997 */
998 if (*++tl != 0) {
999 i = nfsm_rndup(fxdr_unsigned(long, *tl));
1000 nfsm_adv(i);
1001 }
1002 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1003 /* 0 == ok */
1004 if (*tl == 0) {
1005 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1006 if (*tl != 0) {
1007 error = fxdr_unsigned(int, *tl);
1008 m_freem(mrep);
1009 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
1010 error == NQNFS_TRYLATER) {
1011 error = 0;
1012 waituntil = time.tv_sec + trylater_delay;
1013 while (time.tv_sec < waituntil)
1014 (void) tsleep((caddr_t)&lbolt,
1015 PSOCK, "nqnfstry", 0);
1016 trylater_delay *= nfs_backoff[trylater_cnt];
1017 if (trylater_cnt < 7)
1018 trylater_cnt++;
1019 goto tryagain;
1020 }
1021
1022 /*
1023 * If the File Handle was stale, invalidate the
1024 * lookup cache, just in case.
1025 */
1026 if (error == ESTALE)
1027 cache_purge(vp);
1028 m_freem(rep->r_mreq);
1029 free((caddr_t)rep, M_NFSREQ);
1030 return (error);
1031 }
1032
1033 /*
1034 * For nqnfs, get any lease in reply
1035 */
1036 if (nmp->nm_flag & NFSMNT_NQNFS) {
1037 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1038 if (*tl) {
1039 np = VTONFS(vp);
1040 nqlflag = fxdr_unsigned(int, *tl);
1041 nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
1042 cachable = fxdr_unsigned(int, *tl++);
1043 reqtime += fxdr_unsigned(int, *tl++);
1044 if (reqtime > time.tv_sec) {
1045 fxdr_hyper(tl, &frev);
1046 nqnfs_clientlease(nmp, np, nqlflag,
1047 cachable, reqtime, frev);
1048 }
1049 }
1050 }
1051 *mrp = mrep;
1052 *mdp = md;
1053 *dposp = dpos;
1054 m_freem(rep->r_mreq);
1055 FREE((caddr_t)rep, M_NFSREQ);
1056 return (0);
1057 }
1058 m_freem(mrep);
1059 m_freem(rep->r_mreq);
1060 free((caddr_t)rep, M_NFSREQ);
1061 error = EPROTONOSUPPORT;
1062 nfsmout:
1063 return (error);
1064 }
1065 #endif /* NFSCLIENT */
1066
1067 /*
1068 * Generate the rpc reply header
1069 * siz arg. is used to decide if adding a cluster is worthwhile
1070 */
1071 nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp)
1072 int siz;
1073 struct nfsd *nd;
1074 int err;
1075 int cache;
1076 u_quad_t *frev;
1077 struct mbuf **mrq;
1078 struct mbuf **mbp;
1079 caddr_t *bposp;
1080 {
1081 register u_long *tl;
1082 register struct mbuf *mreq;
1083 caddr_t bpos;
1084 struct mbuf *mb, *mb2;
1085
1086 MGETHDR(mreq, M_WAIT, MT_DATA);
1087 mb = mreq;
1088 /*
1089 * If this is a big reply, use a cluster else
1090 * try and leave leading space for the lower level headers.
1091 */
1092 siz += RPC_REPLYSIZ;
1093 if (siz >= MINCLSIZE) {
1094 MCLGET(mreq, M_WAIT);
1095 } else
1096 mreq->m_data += max_hdr;
1097 tl = mtod(mreq, u_long *);
1098 mreq->m_len = 6*NFSX_UNSIGNED;
1099 bpos = ((caddr_t)tl)+mreq->m_len;
1100 *tl++ = txdr_unsigned(nd->nd_retxid);
1101 *tl++ = rpc_reply;
1102 if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) {
1103 *tl++ = rpc_msgdenied;
1104 if (err == NQNFS_AUTHERR) {
1105 *tl++ = rpc_autherr;
1106 *tl = rpc_rejectedcred;
1107 mreq->m_len -= NFSX_UNSIGNED;
1108 bpos -= NFSX_UNSIGNED;
1109 } else {
1110 *tl++ = rpc_mismatch;
1111 *tl++ = txdr_unsigned(2);
1112 *tl = txdr_unsigned(2);
1113 }
1114 } else {
1115 *tl++ = rpc_msgaccepted;
1116 *tl++ = 0;
1117 *tl++ = 0;
1118 switch (err) {
1119 case EPROGUNAVAIL:
1120 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1121 break;
1122 case EPROGMISMATCH:
1123 *tl = txdr_unsigned(RPC_PROGMISMATCH);
1124 nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
1125 *tl++ = txdr_unsigned(2);
1126 *tl = txdr_unsigned(2); /* someday 3 */
1127 break;
1128 case EPROCUNAVAIL:
1129 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1130 break;
1131 default:
1132 *tl = 0;
1133 if (err != VNOVAL) {
1134 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1135 if (err)
1136 *tl = txdr_unsigned(nfsrv_errmap[err - 1]);
1137 else
1138 *tl = 0;
1139 }
1140 break;
1141 };
1142 }
1143
1144 /*
1145 * For nqnfs, piggyback lease as requested.
1146 */
1147 if (nd->nd_nqlflag != NQL_NOVAL && err == 0) {
1148 if (nd->nd_nqlflag) {
1149 nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
1150 *tl++ = txdr_unsigned(nd->nd_nqlflag);
1151 *tl++ = txdr_unsigned(cache);
1152 *tl++ = txdr_unsigned(nd->nd_duration);
1153 txdr_hyper(frev, tl);
1154 } else {
1155 if (nd->nd_nqlflag != 0)
1156 panic("nqreph");
1157 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1158 *tl = 0;
1159 }
1160 }
1161 *mrq = mreq;
1162 *mbp = mb;
1163 *bposp = bpos;
1164 if (err != 0 && err != VNOVAL)
1165 nfsstats.srvrpc_errs++;
1166 return (0);
1167 }
1168
1169 /*
1170 * Nfs timer routine
1171 * Scan the nfsreq list and retranmit any requests that have timed out
1172 * To avoid retransmission attempts on STREAM sockets (in the future) make
1173 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1174 */
1175 void
1176 nfs_timer(arg)
1177 void *arg;
1178 {
1179 register struct nfsreq *rep;
1180 register struct mbuf *m;
1181 register struct socket *so;
1182 register struct nfsmount *nmp;
1183 register int timeo;
1184 static long lasttime = 0;
1185 int s, error;
1186
1187 s = splnet();
1188 for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
1189 nmp = rep->r_nmp;
1190 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1191 continue;
1192 if (nfs_sigintr(nmp, rep, rep->r_procp)) {
1193 rep->r_flags |= R_SOFTTERM;
1194 continue;
1195 }
1196 if (rep->r_rtt >= 0) {
1197 rep->r_rtt++;
1198 if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1199 timeo = nmp->nm_timeo;
1200 else
1201 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
1202 if (nmp->nm_timeouts > 0)
1203 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1204 if (rep->r_rtt <= timeo)
1205 continue;
1206 if (nmp->nm_timeouts < 8)
1207 nmp->nm_timeouts++;
1208 }
1209 /*
1210 * Check for server not responding
1211 */
1212 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
1213 rep->r_rexmit > nmp->nm_deadthresh) {
1214 nfs_msg(rep->r_procp,
1215 nmp->nm_mountp->mnt_stat.f_mntfromname,
1216 "not responding");
1217 rep->r_flags |= R_TPRINTFMSG;
1218 }
1219 if (rep->r_rexmit >= rep->r_retry) { /* too many */
1220 nfsstats.rpctimeouts++;
1221 rep->r_flags |= R_SOFTTERM;
1222 continue;
1223 }
1224 if (nmp->nm_sotype != SOCK_DGRAM) {
1225 if (++rep->r_rexmit > NFS_MAXREXMIT)
1226 rep->r_rexmit = NFS_MAXREXMIT;
1227 continue;
1228 }
1229 if ((so = nmp->nm_so) == NULL)
1230 continue;
1231
1232 /*
1233 * If there is enough space and the window allows..
1234 * Resend it
1235 * Set r_rtt to -1 in case we fail to send it now.
1236 */
1237 rep->r_rtt = -1;
1238 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1239 ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1240 (rep->r_flags & R_SENT) ||
1241 nmp->nm_sent < nmp->nm_cwnd) &&
1242 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1243 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
1244 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1245 (struct mbuf *)0, (struct mbuf *)0);
1246 else
1247 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1248 nmp->nm_nam, (struct mbuf *)0);
1249 if (error) {
1250 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
1251 so->so_error = 0;
1252 } else {
1253 /*
1254 * Iff first send, start timing
1255 * else turn timing off, backoff timer
1256 * and divide congestion window by 2.
1257 */
1258 if (rep->r_flags & R_SENT) {
1259 rep->r_flags &= ~R_TIMING;
1260 if (++rep->r_rexmit > NFS_MAXREXMIT)
1261 rep->r_rexmit = NFS_MAXREXMIT;
1262 nmp->nm_cwnd >>= 1;
1263 if (nmp->nm_cwnd < NFS_CWNDSCALE)
1264 nmp->nm_cwnd = NFS_CWNDSCALE;
1265 nfsstats.rpcretries++;
1266 } else {
1267 rep->r_flags |= R_SENT;
1268 nmp->nm_sent += NFS_CWNDSCALE;
1269 }
1270 rep->r_rtt = 0;
1271 }
1272 }
1273 }
1274
1275 #ifdef NFSSERVER
1276 /*
1277 * Call the nqnfs server timer once a second to handle leases.
1278 */
1279 if (lasttime != time.tv_sec) {
1280 lasttime = time.tv_sec;
1281 nqnfs_serverd();
1282 }
1283 #endif /* NFSSERVER */
1284 splx(s);
1285 timeout(nfs_timer, (void *)0, hz / NFS_HZ);
1286 }
1287
1288 /*
1289 * Test for a termination condition pending on the process.
1290 * This is used for NFSMNT_INT mounts.
1291 */
1292 nfs_sigintr(nmp, rep, p)
1293 struct nfsmount *nmp;
1294 struct nfsreq *rep;
1295 register struct proc *p;
1296 {
1297
1298 if (rep && (rep->r_flags & R_SOFTTERM))
1299 return (EINTR);
1300 if (!(nmp->nm_flag & NFSMNT_INT))
1301 return (0);
1302 if (p && p->p_siglist &&
1303 (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
1304 NFSINT_SIGMASK))
1305 return (EINTR);
1306 return (0);
1307 }
1308
1309 /*
1310 * Lock a socket against others.
1311 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1312 * and also to avoid race conditions between the processes with nfs requests
1313 * in progress when a reconnect is necessary.
1314 */
1315 nfs_sndlock(flagp, rep)
1316 register int *flagp;
1317 struct nfsreq *rep;
1318 {
1319 struct proc *p;
1320 int slpflag = 0, slptimeo = 0;
1321
1322 if (rep) {
1323 p = rep->r_procp;
1324 if (rep->r_nmp->nm_flag & NFSMNT_INT)
1325 slpflag = PCATCH;
1326 } else
1327 p = (struct proc *)0;
1328 while (*flagp & NFSMNT_SNDLOCK) {
1329 if (nfs_sigintr(rep->r_nmp, rep, p))
1330 return (EINTR);
1331 *flagp |= NFSMNT_WANTSND;
1332 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
1333 slptimeo);
1334 if (slpflag == PCATCH) {
1335 slpflag = 0;
1336 slptimeo = 2 * hz;
1337 }
1338 }
1339 *flagp |= NFSMNT_SNDLOCK;
1340 return (0);
1341 }
1342
1343 /*
1344 * Unlock the stream socket for others.
1345 */
1346 void
1347 nfs_sndunlock(flagp)
1348 register int *flagp;
1349 {
1350
1351 if ((*flagp & NFSMNT_SNDLOCK) == 0)
1352 panic("nfs sndunlock");
1353 *flagp &= ~NFSMNT_SNDLOCK;
1354 if (*flagp & NFSMNT_WANTSND) {
1355 *flagp &= ~NFSMNT_WANTSND;
1356 wakeup((caddr_t)flagp);
1357 }
1358 }
1359
1360 nfs_rcvlock(rep)
1361 register struct nfsreq *rep;
1362 {
1363 register int *flagp = &rep->r_nmp->nm_flag;
1364 int slpflag, slptimeo = 0;
1365
1366 if (*flagp & NFSMNT_INT)
1367 slpflag = PCATCH;
1368 else
1369 slpflag = 0;
1370 while (*flagp & NFSMNT_RCVLOCK) {
1371 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
1372 return (EINTR);
1373 *flagp |= NFSMNT_WANTRCV;
1374 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
1375 slptimeo);
1376 if (slpflag == PCATCH) {
1377 slpflag = 0;
1378 slptimeo = 2 * hz;
1379 }
1380 }
1381 *flagp |= NFSMNT_RCVLOCK;
1382 return (0);
1383 }
1384
1385 /*
1386 * Unlock the stream socket for others.
1387 */
1388 void
1389 nfs_rcvunlock(flagp)
1390 register int *flagp;
1391 {
1392
1393 if ((*flagp & NFSMNT_RCVLOCK) == 0)
1394 panic("nfs rcvunlock");
1395 *flagp &= ~NFSMNT_RCVLOCK;
1396 if (*flagp & NFSMNT_WANTRCV) {
1397 *flagp &= ~NFSMNT_WANTRCV;
1398 wakeup((caddr_t)flagp);
1399 }
1400 }
1401
1402 /*
1403 * Check for badly aligned mbuf data areas and
1404 * realign data in an mbuf list by copying the data areas up, as required.
1405 */
1406 void
1407 nfs_realign(m, hsiz)
1408 register struct mbuf *m;
1409 int hsiz;
1410 {
1411 register struct mbuf *m2;
1412 register int siz, mlen, olen;
1413 register caddr_t tcp, fcp;
1414 struct mbuf *mnew;
1415
1416 while (m) {
1417 /*
1418 * This never happens for UDP, rarely happens for TCP
1419 * but frequently happens for iso transport.
1420 */
1421 if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) {
1422 olen = m->m_len;
1423 fcp = mtod(m, caddr_t);
1424 if ((int)fcp & 0x3) {
1425 m->m_flags &= ~M_PKTHDR;
1426 if (m->m_flags & M_EXT)
1427 m->m_data = m->m_ext.ext_buf +
1428 ((m->m_ext.ext_size - olen) & ~0x3);
1429 else
1430 m->m_data = m->m_dat;
1431 }
1432 m->m_len = 0;
1433 tcp = mtod(m, caddr_t);
1434 mnew = m;
1435 m2 = m->m_next;
1436
1437 /*
1438 * If possible, only put the first invariant part
1439 * of the RPC header in the first mbuf.
1440 */
1441 mlen = M_TRAILINGSPACE(m);
1442 if (olen <= hsiz && mlen > hsiz)
1443 mlen = hsiz;
1444
1445 /*
1446 * Loop through the mbuf list consolidating data.
1447 */
1448 while (m) {
1449 while (olen > 0) {
1450 if (mlen == 0) {
1451 m2->m_flags &= ~M_PKTHDR;
1452 if (m2->m_flags & M_EXT)
1453 m2->m_data = m2->m_ext.ext_buf;
1454 else
1455 m2->m_data = m2->m_dat;
1456 m2->m_len = 0;
1457 mlen = M_TRAILINGSPACE(m2);
1458 tcp = mtod(m2, caddr_t);
1459 mnew = m2;
1460 m2 = m2->m_next;
1461 }
1462 siz = min(mlen, olen);
1463 if (tcp != fcp)
1464 bcopy(fcp, tcp, siz);
1465 mnew->m_len += siz;
1466 mlen -= siz;
1467 olen -= siz;
1468 tcp += siz;
1469 fcp += siz;
1470 }
1471 m = m->m_next;
1472 if (m) {
1473 olen = m->m_len;
1474 fcp = mtod(m, caddr_t);
1475 }
1476 }
1477
1478 /*
1479 * Finally, set m_len == 0 for any trailing mbufs that have
1480 * been copied out of.
1481 */
1482 while (m2) {
1483 m2->m_len = 0;
1484 m2 = m2->m_next;
1485 }
1486 return;
1487 }
1488 m = m->m_next;
1489 }
1490 }
1491
1492 /*
1493 * Parse an RPC request
1494 * - verify it
1495 * - fill in the cred struct.
1496 */
1497 nfs_getreq(nd, has_header)
1498 register struct nfsd *nd;
1499 int has_header;
1500 {
1501 register int len, i;
1502 register u_long *tl;
1503 register long t1;
1504 struct uio uio;
1505 struct iovec iov;
1506 caddr_t dpos, cp2;
1507 u_long nfsvers, auth_type;
1508 int error = 0, nqnfs = 0;
1509 struct mbuf *mrep, *md;
1510
1511 mrep = nd->nd_mrep;
1512 md = nd->nd_md;
1513 dpos = nd->nd_dpos;
1514 if (has_header) {
1515 nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED);
1516 nd->nd_retxid = fxdr_unsigned(u_long, *tl++);
1517 if (*tl++ != rpc_call) {
1518 m_freem(mrep);
1519 return (EBADRPC);
1520 }
1521 } else {
1522 nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED);
1523 }
1524 nd->nd_repstat = 0;
1525 if (*tl++ != rpc_vers) {
1526 nd->nd_repstat = ERPCMISMATCH;
1527 nd->nd_procnum = NFSPROC_NOOP;
1528 return (0);
1529 }
1530 nfsvers = nfs_vers;
1531 if (*tl != nfs_prog) {
1532 if (*tl == nqnfs_prog) {
1533 nqnfs++;
1534 nfsvers = nqnfs_vers;
1535 } else {
1536 nd->nd_repstat = EPROGUNAVAIL;
1537 nd->nd_procnum = NFSPROC_NOOP;
1538 return (0);
1539 }
1540 }
1541 tl++;
1542 if (*tl++ != nfsvers) {
1543 nd->nd_repstat = EPROGMISMATCH;
1544 nd->nd_procnum = NFSPROC_NOOP;
1545 return (0);
1546 }
1547 nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
1548 if (nd->nd_procnum == NFSPROC_NULL)
1549 return (0);
1550 if (nd->nd_procnum >= NFS_NPROCS ||
1551 (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) ||
1552 (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) {
1553 nd->nd_repstat = EPROCUNAVAIL;
1554 nd->nd_procnum = NFSPROC_NOOP;
1555 return (0);
1556 }
1557 auth_type = *tl++;
1558 len = fxdr_unsigned(int, *tl++);
1559 if (len < 0 || len > RPCAUTH_MAXSIZ) {
1560 m_freem(mrep);
1561 return (EBADRPC);
1562 }
1563
1564 /*
1565 * Handle auth_unix or auth_kerb.
1566 */
1567 if (auth_type == rpc_auth_unix) {
1568 len = fxdr_unsigned(int, *++tl);
1569 if (len < 0 || len > NFS_MAXNAMLEN) {
1570 m_freem(mrep);
1571 return (EBADRPC);
1572 }
1573 nfsm_adv(nfsm_rndup(len));
1574 nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
1575 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
1576 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
1577 len = fxdr_unsigned(int, *tl);
1578 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
1579 m_freem(mrep);
1580 return (EBADRPC);
1581 }
1582 nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
1583 for (i = 1; i <= len; i++)
1584 if (i < NGROUPS)
1585 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
1586 else
1587 tl++;
1588 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
1589 } else if (auth_type == rpc_auth_kerb) {
1590 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
1591 nd->nd_authlen = fxdr_unsigned(int, *tl);
1592 uio.uio_resid = nfsm_rndup(nd->nd_authlen);
1593 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
1594 m_freem(mrep);
1595 return (EBADRPC);
1596 }
1597 uio.uio_offset = 0;
1598 uio.uio_iov = &iov;
1599 uio.uio_iovcnt = 1;
1600 uio.uio_segflg = UIO_SYSSPACE;
1601 iov.iov_base = (caddr_t)nd->nd_authstr;
1602 iov.iov_len = RPCAUTH_MAXSIZ;
1603 nfsm_mtouio(&uio, uio.uio_resid);
1604 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
1605 nd->nd_flag |= NFSD_NEEDAUTH;
1606 }
1607
1608 /*
1609 * Do we have any use for the verifier.
1610 * According to the "Remote Procedure Call Protocol Spec." it
1611 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
1612 * For now, just skip over it
1613 */
1614 len = fxdr_unsigned(int, *++tl);
1615 if (len < 0 || len > RPCAUTH_MAXSIZ) {
1616 m_freem(mrep);
1617 return (EBADRPC);
1618 }
1619 if (len > 0) {
1620 nfsm_adv(nfsm_rndup(len));
1621 }
1622
1623 /*
1624 * For nqnfs, get piggybacked lease request.
1625 */
1626 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
1627 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1628 nd->nd_nqlflag = fxdr_unsigned(int, *tl);
1629 if (nd->nd_nqlflag) {
1630 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1631 nd->nd_duration = fxdr_unsigned(int, *tl);
1632 } else
1633 nd->nd_duration = NQ_MINLEASE;
1634 } else {
1635 nd->nd_nqlflag = NQL_NOVAL;
1636 nd->nd_duration = NQ_MINLEASE;
1637 }
1638 nd->nd_md = md;
1639 nd->nd_dpos = dpos;
1640 return (0);
1641 nfsmout:
1642 return (error);
1643 }
1644
1645 nfs_msg(p, server, msg)
1646 struct proc *p;
1647 char *server, *msg;
1648 {
1649 tpr_t tpr;
1650
1651 if (p)
1652 tpr = tprintf_open(p);
1653 else
1654 tpr = NULL;
1655 tprintf(tpr, "nfs server %s: %s\n", server, msg);
1656 tprintf_close(tpr);
1657 }
1658
1659 #ifdef NFSSERVER
1660 int nfsrv_null(),
1661 nfsrv_getattr(),
1662 nfsrv_setattr(),
1663 nfsrv_lookup(),
1664 nfsrv_readlink(),
1665 nfsrv_read(),
1666 nfsrv_write(),
1667 nfsrv_create(),
1668 nfsrv_remove(),
1669 nfsrv_rename(),
1670 nfsrv_link(),
1671 nfsrv_symlink(),
1672 nfsrv_mkdir(),
1673 nfsrv_rmdir(),
1674 nfsrv_readdir(),
1675 nfsrv_statfs(),
1676 nfsrv_noop(),
1677 nqnfsrv_readdirlook(),
1678 nqnfsrv_getlease(),
1679 nqnfsrv_vacated(),
1680 nqnfsrv_access();
1681
1682 int (*nfsrv_procs[NFS_NPROCS])() = {
1683 nfsrv_null,
1684 nfsrv_getattr,
1685 nfsrv_setattr,
1686 nfsrv_noop,
1687 nfsrv_lookup,
1688 nfsrv_readlink,
1689 nfsrv_read,
1690 nfsrv_noop,
1691 nfsrv_write,
1692 nfsrv_create,
1693 nfsrv_remove,
1694 nfsrv_rename,
1695 nfsrv_link,
1696 nfsrv_symlink,
1697 nfsrv_mkdir,
1698 nfsrv_rmdir,
1699 nfsrv_readdir,
1700 nfsrv_statfs,
1701 nqnfsrv_readdirlook,
1702 nqnfsrv_getlease,
1703 nqnfsrv_vacated,
1704 nfsrv_noop,
1705 nqnfsrv_access,
1706 };
1707
1708 /*
1709 * Socket upcall routine for the nfsd sockets.
1710 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
1711 * Essentially do as much as possible non-blocking, else punt and it will
1712 * be called with M_WAIT from an nfsd.
1713 */
1714 void
1715 nfsrv_rcv(so, arg, waitflag)
1716 struct socket *so;
1717 caddr_t arg;
1718 int waitflag;
1719 {
1720 register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
1721 register struct mbuf *m;
1722 struct mbuf *mp, *nam;
1723 struct uio auio;
1724 int flags, error;
1725
1726 if ((slp->ns_flag & SLP_VALID) == 0)
1727 return;
1728 #ifdef notdef
1729 /*
1730 * Define this to test for nfsds handling this under heavy load.
1731 */
1732 if (waitflag == M_DONTWAIT) {
1733 slp->ns_flag |= SLP_NEEDQ; goto dorecs;
1734 }
1735 #endif
1736 auio.uio_procp = NULL;
1737 if (so->so_type == SOCK_STREAM) {
1738 /*
1739 * If there are already records on the queue, defer soreceive()
1740 * to an nfsd so that there is feedback to the TCP layer that
1741 * the nfs servers are heavily loaded.
1742 */
1743 if (slp->ns_rec && waitflag == M_DONTWAIT) {
1744 slp->ns_flag |= SLP_NEEDQ;
1745 goto dorecs;
1746 }
1747
1748 /*
1749 * Do soreceive().
1750 */
1751 auio.uio_resid = 1000000000;
1752 flags = MSG_DONTWAIT;
1753 error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
1754 if (error || mp == (struct mbuf *)0) {
1755 if (error == EWOULDBLOCK)
1756 slp->ns_flag |= SLP_NEEDQ;
1757 else
1758 slp->ns_flag |= SLP_DISCONN;
1759 goto dorecs;
1760 }
1761 m = mp;
1762 if (slp->ns_rawend) {
1763 slp->ns_rawend->m_next = m;
1764 slp->ns_cc += 1000000000 - auio.uio_resid;
1765 } else {
1766 slp->ns_raw = m;
1767 slp->ns_cc = 1000000000 - auio.uio_resid;
1768 }
1769 while (m->m_next)
1770 m = m->m_next;
1771 slp->ns_rawend = m;
1772
1773 /*
1774 * Now try and parse record(s) out of the raw stream data.
1775 */
1776 if (error = nfsrv_getstream(slp, waitflag)) {
1777 if (error == EPERM)
1778 slp->ns_flag |= SLP_DISCONN;
1779 else
1780 slp->ns_flag |= SLP_NEEDQ;
1781 }
1782 } else {
1783 do {
1784 auio.uio_resid = 1000000000;
1785 flags = MSG_DONTWAIT;
1786 error = soreceive(so, &nam, &auio, &mp,
1787 (struct mbuf **)0, &flags);
1788 if (mp) {
1789 nfs_realign(mp, 10 * NFSX_UNSIGNED);
1790 if (nam) {
1791 m = nam;
1792 m->m_next = mp;
1793 } else
1794 m = mp;
1795 if (slp->ns_recend)
1796 slp->ns_recend->m_nextpkt = m;
1797 else
1798 slp->ns_rec = m;
1799 slp->ns_recend = m;
1800 m->m_nextpkt = (struct mbuf *)0;
1801 }
1802 if (error) {
1803 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
1804 && error != EWOULDBLOCK) {
1805 slp->ns_flag |= SLP_DISCONN;
1806 goto dorecs;
1807 }
1808 }
1809 } while (mp);
1810 }
1811
1812 /*
1813 * Now try and process the request records, non-blocking.
1814 */
1815 dorecs:
1816 if (waitflag == M_DONTWAIT &&
1817 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
1818 nfsrv_wakenfsd(slp);
1819 }
1820
1821 /*
1822 * Try and extract an RPC request from the mbuf data list received on a
1823 * stream socket. The "waitflag" argument indicates whether or not it
1824 * can sleep.
1825 */
1826 nfsrv_getstream(slp, waitflag)
1827 register struct nfssvc_sock *slp;
1828 int waitflag;
1829 {
1830 register struct mbuf *m;
1831 register char *cp1, *cp2;
1832 register int len;
1833 struct mbuf *om, *m2, *recm;
1834 u_long recmark;
1835
1836 if (slp->ns_flag & SLP_GETSTREAM)
1837 panic("nfs getstream");
1838 slp->ns_flag |= SLP_GETSTREAM;
1839 for (;;) {
1840 if (slp->ns_reclen == 0) {
1841 if (slp->ns_cc < NFSX_UNSIGNED) {
1842 slp->ns_flag &= ~SLP_GETSTREAM;
1843 return (0);
1844 }
1845 m = slp->ns_raw;
1846 if (m->m_len >= NFSX_UNSIGNED) {
1847 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
1848 m->m_data += NFSX_UNSIGNED;
1849 m->m_len -= NFSX_UNSIGNED;
1850 } else {
1851 cp1 = (caddr_t)&recmark;
1852 cp2 = mtod(m, caddr_t);
1853 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
1854 while (m->m_len == 0) {
1855 m = m->m_next;
1856 cp2 = mtod(m, caddr_t);
1857 }
1858 *cp1++ = *cp2++;
1859 m->m_data++;
1860 m->m_len--;
1861 }
1862 }
1863 slp->ns_cc -= NFSX_UNSIGNED;
1864 slp->ns_reclen = ntohl(recmark) & ~0x80000000;
1865 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
1866 slp->ns_flag &= ~SLP_GETSTREAM;
1867 return (EPERM);
1868 }
1869 }
1870
1871 /*
1872 * Now get the record part.
1873 */
1874 if (slp->ns_cc == slp->ns_reclen) {
1875 recm = slp->ns_raw;
1876 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
1877 slp->ns_cc = slp->ns_reclen = 0;
1878 } else if (slp->ns_cc > slp->ns_reclen) {
1879 len = 0;
1880 m = slp->ns_raw;
1881 om = (struct mbuf *)0;
1882 while (len < slp->ns_reclen) {
1883 if ((len + m->m_len) > slp->ns_reclen) {
1884 m2 = m_copym(m, 0, slp->ns_reclen - len,
1885 waitflag);
1886 if (m2) {
1887 if (om) {
1888 om->m_next = m2;
1889 recm = slp->ns_raw;
1890 } else
1891 recm = m2;
1892 m->m_data += slp->ns_reclen - len;
1893 m->m_len -= slp->ns_reclen - len;
1894 len = slp->ns_reclen;
1895 } else {
1896 slp->ns_flag &= ~SLP_GETSTREAM;
1897 return (EWOULDBLOCK);
1898 }
1899 } else if ((len + m->m_len) == slp->ns_reclen) {
1900 om = m;
1901 len += m->m_len;
1902 m = m->m_next;
1903 recm = slp->ns_raw;
1904 om->m_next = (struct mbuf *)0;
1905 } else {
1906 om = m;
1907 len += m->m_len;
1908 m = m->m_next;
1909 }
1910 }
1911 slp->ns_raw = m;
1912 slp->ns_cc -= len;
1913 slp->ns_reclen = 0;
1914 } else {
1915 slp->ns_flag &= ~SLP_GETSTREAM;
1916 return (0);
1917 }
1918 nfs_realign(recm, 10 * NFSX_UNSIGNED);
1919 if (slp->ns_recend)
1920 slp->ns_recend->m_nextpkt = recm;
1921 else
1922 slp->ns_rec = recm;
1923 slp->ns_recend = recm;
1924 }
1925 }
1926
1927 /*
1928 * Parse an RPC header.
1929 */
1930 nfsrv_dorec(slp, nd)
1931 register struct nfssvc_sock *slp;
1932 register struct nfsd *nd;
1933 {
1934 register struct mbuf *m;
1935 int error;
1936
1937 if ((slp->ns_flag & SLP_VALID) == 0 ||
1938 (m = slp->ns_rec) == (struct mbuf *)0)
1939 return (ENOBUFS);
1940 if (slp->ns_rec = m->m_nextpkt)
1941 m->m_nextpkt = (struct mbuf *)0;
1942 else
1943 slp->ns_recend = (struct mbuf *)0;
1944 if (m->m_type == MT_SONAME) {
1945 nd->nd_nam = m;
1946 nd->nd_md = nd->nd_mrep = m->m_next;
1947 m->m_next = (struct mbuf *)0;
1948 } else {
1949 nd->nd_nam = (struct mbuf *)0;
1950 nd->nd_md = nd->nd_mrep = m;
1951 }
1952 nd->nd_dpos = mtod(nd->nd_md, caddr_t);
1953 if (error = nfs_getreq(nd, TRUE)) {
1954 m_freem(nd->nd_nam);
1955 return (error);
1956 }
1957 return (0);
1958 }
1959
1960 /*
1961 * Search for a sleeping nfsd and wake it up.
1962 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
1963 * running nfsds will go look for the work in the nfssvc_sock list.
1964 */
1965 void
1966 nfsrv_wakenfsd(slp)
1967 struct nfssvc_sock *slp;
1968 {
1969 register struct nfsd *nd = nfsd_head.nd_next;
1970
1971 if ((slp->ns_flag & SLP_VALID) == 0)
1972 return;
1973 while (nd != (struct nfsd *)&nfsd_head) {
1974 if (nd->nd_flag & NFSD_WAITING) {
1975 nd->nd_flag &= ~NFSD_WAITING;
1976 if (nd->nd_slp)
1977 panic("nfsd wakeup");
1978 slp->ns_sref++;
1979 nd->nd_slp = slp;
1980 wakeup((caddr_t)nd);
1981 return;
1982 }
1983 nd = nd->nd_next;
1984 }
1985 slp->ns_flag |= SLP_DOREC;
1986 nfsd_head.nd_flag |= NFSD_CHECKSLP;
1987 }
1988 #endif /* NFSSERVER */
1989