nfs_socket.c revision 1.55 1 /* $NetBSD: nfs_socket.c,v 1.55 2000/03/30 12:51:15 augustss Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1991, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
39 */
40
41 /*
42 * Socket operations for use by nfs
43 */
44
45 #include "fs_nfs.h"
46 #include "opt_nfsserver.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/callout.h>
51 #include <sys/proc.h>
52 #include <sys/mount.h>
53 #include <sys/kernel.h>
54 #include <sys/mbuf.h>
55 #include <sys/vnode.h>
56 #include <sys/domain.h>
57 #include <sys/protosw.h>
58 #include <sys/socket.h>
59 #include <sys/socketvar.h>
60 #include <sys/syslog.h>
61 #include <sys/tprintf.h>
62 #include <sys/namei.h>
63 #include <sys/signal.h>
64 #include <sys/signalvar.h>
65
66 #include <netinet/in.h>
67 #include <netinet/tcp.h>
68
69 #include <nfs/rpcv2.h>
70 #include <nfs/nfsproto.h>
71 #include <nfs/nfs.h>
72 #include <nfs/xdr_subs.h>
73 #include <nfs/nfsm_subs.h>
74 #include <nfs/nfsmount.h>
75 #include <nfs/nfsnode.h>
76 #include <nfs/nfsrtt.h>
77 #include <nfs/nqnfs.h>
78 #include <nfs/nfs_var.h>
79
80 #define TRUE 1
81 #define FALSE 0
82
83 /*
84 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
85 * Use the mean and mean deviation of rtt for the appropriate type of rpc
86 * for the frequent rpcs and a default for the others.
87 * The justification for doing "other" this way is that these rpcs
88 * happen so infrequently that timer est. would probably be stale.
89 * Also, since many of these rpcs are
90 * non-idempotent, a conservative timeout is desired.
91 * getattr, lookup - A+2D
92 * read, write - A+4D
93 * other - nm_timeo
94 */
95 #define NFS_RTO(n, t) \
96 ((t) == 0 ? (n)->nm_timeo : \
97 ((t) < 3 ? \
98 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
99 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
100 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
101 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
102 /*
103 * External data, mostly RPC constants in XDR form
104 */
105 extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers,
106 rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr,
107 rpc_auth_kerb;
108 extern u_int32_t nfs_prog, nqnfs_prog;
109 extern time_t nqnfsstarttime;
110 extern struct nfsstats nfsstats;
111 extern int nfsv3_procid[NFS_NPROCS];
112 extern int nfs_ticks;
113
114 /*
115 * Defines which timer to use for the procnum.
116 * 0 - default
117 * 1 - getattr
118 * 2 - lookup
119 * 3 - read
120 * 4 - write
121 */
122 static int proct[NFS_NPROCS] = {
123 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
124 0, 0, 0,
125 };
126
127 /*
128 * There is a congestion window for outstanding rpcs maintained per mount
129 * point. The cwnd size is adjusted in roughly the way that:
130 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
131 * SIGCOMM '88". ACM, August 1988.
132 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
133 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
134 * of rpcs is in progress.
135 * (The sent count and cwnd are scaled for integer arith.)
136 * Variants of "slow start" were tried and were found to be too much of a
137 * performance hit (ave. rtt 3 times larger),
138 * I suspect due to the large rtt that nfs rpcs have.
139 */
140 #define NFS_CWNDSCALE 256
141 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
142 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
143 int nfsrtton = 0;
144 struct nfsrtt nfsrtt;
145
146 struct callout nfs_timer_ch = CALLOUT_INITIALIZER;
147
148 /*
149 * Initialize sockets and congestion for a new NFS connection.
150 * We do not free the sockaddr if error.
151 */
152 int
153 nfs_connect(nmp, rep)
154 struct nfsmount *nmp;
155 struct nfsreq *rep;
156 {
157 struct socket *so;
158 int s, error, rcvreserve, sndreserve;
159 struct sockaddr *saddr;
160 struct sockaddr_in *sin;
161 struct mbuf *m;
162 u_int16_t tport;
163
164 nmp->nm_so = (struct socket *)0;
165 saddr = mtod(nmp->nm_nam, struct sockaddr *);
166 error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
167 nmp->nm_soproto);
168 if (error)
169 goto bad;
170 so = nmp->nm_so;
171 nmp->nm_soflags = so->so_proto->pr_flags;
172
173 /*
174 * Some servers require that the client port be a reserved port number.
175 */
176 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
177 MGET(m, M_WAIT, MT_SONAME);
178 sin = mtod(m, struct sockaddr_in *);
179 sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
180 sin->sin_family = AF_INET;
181 sin->sin_addr.s_addr = INADDR_ANY;
182 tport = IPPORT_RESERVED - 1;
183 sin->sin_port = htons(tport);
184 while ((error = sobind(so, m)) == EADDRINUSE &&
185 --tport > IPPORT_RESERVED / 2)
186 sin->sin_port = htons(tport);
187 m_freem(m);
188 if (error)
189 goto bad;
190 }
191
192 /*
193 * Protocols that do not require connections may be optionally left
194 * unconnected for servers that reply from a port other than NFS_PORT.
195 */
196 if (nmp->nm_flag & NFSMNT_NOCONN) {
197 if (nmp->nm_soflags & PR_CONNREQUIRED) {
198 error = ENOTCONN;
199 goto bad;
200 }
201 } else {
202 error = soconnect(so, nmp->nm_nam);
203 if (error)
204 goto bad;
205
206 /*
207 * Wait for the connection to complete. Cribbed from the
208 * connect system call but with the wait timing out so
209 * that interruptible mounts don't hang here for a long time.
210 */
211 s = splsoftnet();
212 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
213 (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
214 "nfscn1", 2 * hz);
215 if ((so->so_state & SS_ISCONNECTING) &&
216 so->so_error == 0 && rep &&
217 (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){
218 so->so_state &= ~SS_ISCONNECTING;
219 splx(s);
220 goto bad;
221 }
222 }
223 if (so->so_error) {
224 error = so->so_error;
225 so->so_error = 0;
226 splx(s);
227 goto bad;
228 }
229 splx(s);
230 }
231 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
232 so->so_rcv.sb_timeo = (5 * hz);
233 so->so_snd.sb_timeo = (5 * hz);
234 } else {
235 so->so_rcv.sb_timeo = 0;
236 so->so_snd.sb_timeo = 0;
237 }
238 if (nmp->nm_sotype == SOCK_DGRAM) {
239 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
240 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
241 NFS_MAXPKTHDR) * 2;
242 } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
243 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
244 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
245 NFS_MAXPKTHDR) * 2;
246 } else {
247 if (nmp->nm_sotype != SOCK_STREAM)
248 panic("nfscon sotype");
249 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
250 MGET(m, M_WAIT, MT_SOOPTS);
251 *mtod(m, int32_t *) = 1;
252 m->m_len = sizeof(int32_t);
253 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
254 }
255 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
256 MGET(m, M_WAIT, MT_SOOPTS);
257 *mtod(m, int32_t *) = 1;
258 m->m_len = sizeof(int32_t);
259 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
260 }
261 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
262 sizeof (u_int32_t)) * 2;
263 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
264 sizeof (u_int32_t)) * 2;
265 }
266 error = soreserve(so, sndreserve, rcvreserve);
267 if (error)
268 goto bad;
269 so->so_rcv.sb_flags |= SB_NOINTR;
270 so->so_snd.sb_flags |= SB_NOINTR;
271
272 /* Initialize other non-zero congestion variables */
273 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
274 nmp->nm_srtt[4] = (NFS_TIMEO << 3);
275 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
276 nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
277 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
278 nmp->nm_sent = 0;
279 nmp->nm_timeouts = 0;
280 return (0);
281
282 bad:
283 nfs_disconnect(nmp);
284 return (error);
285 }
286
287 /*
288 * Reconnect routine:
289 * Called when a connection is broken on a reliable protocol.
290 * - clean up the old socket
291 * - nfs_connect() again
292 * - set R_MUSTRESEND for all outstanding requests on mount point
293 * If this fails the mount point is DEAD!
294 * nb: Must be called with the nfs_sndlock() set on the mount point.
295 */
296 int
297 nfs_reconnect(rep)
298 struct nfsreq *rep;
299 {
300 struct nfsreq *rp;
301 struct nfsmount *nmp = rep->r_nmp;
302 int error;
303
304 nfs_disconnect(nmp);
305 while ((error = nfs_connect(nmp, rep)) != 0) {
306 if (error == EINTR || error == ERESTART)
307 return (EINTR);
308 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscn2", 0);
309 }
310
311 /*
312 * Loop through outstanding request list and fix up all requests
313 * on old socket.
314 */
315 for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) {
316 if (rp->r_nmp == nmp)
317 rp->r_flags |= R_MUSTRESEND;
318 }
319 return (0);
320 }
321
322 /*
323 * NFS disconnect. Clean up and unlink.
324 */
325 void
326 nfs_disconnect(nmp)
327 struct nfsmount *nmp;
328 {
329 struct socket *so;
330 int drain = 0;
331
332 if (nmp->nm_so) {
333 so = nmp->nm_so;
334 nmp->nm_so = (struct socket *)0;
335 soshutdown(so, 2);
336 drain = (nmp->nm_iflag & NFSMNT_DISMNT) != 0;
337 if (drain) {
338 /*
339 * soshutdown() above should wake up the current
340 * listener.
341 * Now wake up those waiting for the recive lock, and
342 * wait for them to go away unhappy, to prevent *nmp
343 * from evaporating while they're sleeping.
344 */
345 while (nmp->nm_waiters > 0) {
346 wakeup (&nmp->nm_iflag);
347 sleep(&nmp->nm_waiters, PVFS);
348 }
349 }
350 soclose(so);
351 }
352 #ifdef DIAGNOSTIC
353 if (drain && (nmp->nm_waiters > 0))
354 panic("nfs_disconnect: waiters left after drain?\n");
355 #endif
356 }
357
358 void
359 nfs_safedisconnect(nmp)
360 struct nfsmount *nmp;
361 {
362 struct nfsreq dummyreq;
363
364 memset(&dummyreq, 0, sizeof(dummyreq));
365 dummyreq.r_nmp = nmp;
366 nfs_rcvlock(&dummyreq); /* XXX ignored error return */
367 nfs_disconnect(nmp);
368 nfs_rcvunlock(&nmp->nm_iflag);
369 }
370
371 /*
372 * This is the nfs send routine. For connection based socket types, it
373 * must be called with an nfs_sndlock() on the socket.
374 * "rep == NULL" indicates that it has been called from a server.
375 * For the client side:
376 * - return EINTR if the RPC is terminated, 0 otherwise
377 * - set R_MUSTRESEND if the send fails for any reason
378 * - do any cleanup required by recoverable socket errors (???)
379 * For the server side:
380 * - return EINTR or ERESTART if interrupted by a signal
381 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
382 * - do any cleanup required by recoverable socket errors (???)
383 */
384 int
385 nfs_send(so, nam, top, rep)
386 struct socket *so;
387 struct mbuf *nam;
388 struct mbuf *top;
389 struct nfsreq *rep;
390 {
391 struct mbuf *sendnam;
392 int error, soflags, flags;
393
394 if (rep) {
395 if (rep->r_flags & R_SOFTTERM) {
396 m_freem(top);
397 return (EINTR);
398 }
399 if ((so = rep->r_nmp->nm_so) == NULL) {
400 rep->r_flags |= R_MUSTRESEND;
401 m_freem(top);
402 return (0);
403 }
404 rep->r_flags &= ~R_MUSTRESEND;
405 soflags = rep->r_nmp->nm_soflags;
406 } else
407 soflags = so->so_proto->pr_flags;
408 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
409 sendnam = (struct mbuf *)0;
410 else
411 sendnam = nam;
412 if (so->so_type == SOCK_SEQPACKET)
413 flags = MSG_EOR;
414 else
415 flags = 0;
416
417 error = (*so->so_send)(so, sendnam, (struct uio *)0, top,
418 (struct mbuf *)0, flags);
419 if (error) {
420 if (rep) {
421 log(LOG_INFO, "nfs send error %d for server %s\n",error,
422 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
423 /*
424 * Deal with errors for the client side.
425 */
426 if (rep->r_flags & R_SOFTTERM)
427 error = EINTR;
428 else
429 rep->r_flags |= R_MUSTRESEND;
430 } else
431 log(LOG_INFO, "nfsd send error %d\n", error);
432
433 /*
434 * Handle any recoverable (soft) socket errors here. (???)
435 */
436 if (error != EINTR && error != ERESTART &&
437 error != EWOULDBLOCK && error != EPIPE)
438 error = 0;
439 }
440 return (error);
441 }
442
443 #ifdef NFS
444 /*
445 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
446 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
447 * Mark and consolidate the data into a new mbuf list.
448 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
449 * small mbufs.
450 * For SOCK_STREAM we must be very careful to read an entire record once
451 * we have read any of it, even if the system call has been interrupted.
452 */
453 int
454 nfs_receive(rep, aname, mp)
455 struct nfsreq *rep;
456 struct mbuf **aname;
457 struct mbuf **mp;
458 {
459 struct socket *so;
460 struct uio auio;
461 struct iovec aio;
462 struct mbuf *m;
463 struct mbuf *control;
464 u_int32_t len;
465 struct mbuf **getnam;
466 int error, sotype, rcvflg;
467 struct proc *p = curproc; /* XXX */
468
469 /*
470 * Set up arguments for soreceive()
471 */
472 *mp = (struct mbuf *)0;
473 *aname = (struct mbuf *)0;
474 sotype = rep->r_nmp->nm_sotype;
475
476 /*
477 * For reliable protocols, lock against other senders/receivers
478 * in case a reconnect is necessary.
479 * For SOCK_STREAM, first get the Record Mark to find out how much
480 * more there is to get.
481 * We must lock the socket against other receivers
482 * until we have an entire rpc request/reply.
483 */
484 if (sotype != SOCK_DGRAM) {
485 error = nfs_sndlock(&rep->r_nmp->nm_iflag, rep);
486 if (error)
487 return (error);
488 tryagain:
489 /*
490 * Check for fatal errors and resending request.
491 */
492 /*
493 * Ugh: If a reconnect attempt just happened, nm_so
494 * would have changed. NULL indicates a failed
495 * attempt that has essentially shut down this
496 * mount point.
497 */
498 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
499 nfs_sndunlock(&rep->r_nmp->nm_iflag);
500 return (EINTR);
501 }
502 so = rep->r_nmp->nm_so;
503 if (!so) {
504 error = nfs_reconnect(rep);
505 if (error) {
506 nfs_sndunlock(&rep->r_nmp->nm_iflag);
507 return (error);
508 }
509 goto tryagain;
510 }
511 while (rep->r_flags & R_MUSTRESEND) {
512 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
513 nfsstats.rpcretries++;
514 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
515 if (error) {
516 if (error == EINTR || error == ERESTART ||
517 (error = nfs_reconnect(rep)) != 0) {
518 nfs_sndunlock(&rep->r_nmp->nm_iflag);
519 return (error);
520 }
521 goto tryagain;
522 }
523 }
524 nfs_sndunlock(&rep->r_nmp->nm_iflag);
525 if (sotype == SOCK_STREAM) {
526 aio.iov_base = (caddr_t) &len;
527 aio.iov_len = sizeof(u_int32_t);
528 auio.uio_iov = &aio;
529 auio.uio_iovcnt = 1;
530 auio.uio_segflg = UIO_SYSSPACE;
531 auio.uio_rw = UIO_READ;
532 auio.uio_offset = 0;
533 auio.uio_resid = sizeof(u_int32_t);
534 auio.uio_procp = p;
535 do {
536 rcvflg = MSG_WAITALL;
537 error = (*so->so_receive)(so, (struct mbuf **)0, &auio,
538 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
539 if (error == EWOULDBLOCK && rep) {
540 if (rep->r_flags & R_SOFTTERM)
541 return (EINTR);
542 }
543 } while (error == EWOULDBLOCK);
544 if (!error && auio.uio_resid > 0) {
545 /*
546 * Don't log a 0 byte receive; it means
547 * that the socket has been closed, and
548 * can happen during normal operation
549 * (forcible unmount or Solaris server).
550 */
551 if (auio.uio_resid != sizeof (u_int32_t))
552 log(LOG_INFO,
553 "short receive (%lu/%lu) from nfs server %s\n",
554 (u_long)sizeof(u_int32_t) - auio.uio_resid,
555 (u_long)sizeof(u_int32_t),
556 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
557 error = EPIPE;
558 }
559 if (error)
560 goto errout;
561 len = ntohl(len) & ~0x80000000;
562 /*
563 * This is SERIOUS! We are out of sync with the sender
564 * and forcing a disconnect/reconnect is all I can do.
565 */
566 if (len > NFS_MAXPACKET) {
567 log(LOG_ERR, "%s (%d) from nfs server %s\n",
568 "impossible packet length",
569 len,
570 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
571 error = EFBIG;
572 goto errout;
573 }
574 auio.uio_resid = len;
575 do {
576 rcvflg = MSG_WAITALL;
577 error = (*so->so_receive)(so, (struct mbuf **)0,
578 &auio, mp, (struct mbuf **)0, &rcvflg);
579 } while (error == EWOULDBLOCK || error == EINTR ||
580 error == ERESTART);
581 if (!error && auio.uio_resid > 0) {
582 if (len != auio.uio_resid)
583 log(LOG_INFO,
584 "short receive (%lu/%d) from nfs server %s\n",
585 (u_long)len - auio.uio_resid, len,
586 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
587 error = EPIPE;
588 }
589 } else {
590 /*
591 * NB: Since uio_resid is big, MSG_WAITALL is ignored
592 * and soreceive() will return when it has either a
593 * control msg or a data msg.
594 * We have no use for control msg., but must grab them
595 * and then throw them away so we know what is going
596 * on.
597 */
598 auio.uio_resid = len = 100000000; /* Anything Big */
599 auio.uio_procp = p;
600 do {
601 rcvflg = 0;
602 error = (*so->so_receive)(so, (struct mbuf **)0,
603 &auio, mp, &control, &rcvflg);
604 if (control)
605 m_freem(control);
606 if (error == EWOULDBLOCK && rep) {
607 if (rep->r_flags & R_SOFTTERM)
608 return (EINTR);
609 }
610 } while (error == EWOULDBLOCK ||
611 (!error && *mp == NULL && control));
612 if ((rcvflg & MSG_EOR) == 0)
613 printf("Egad!!\n");
614 if (!error && *mp == NULL)
615 error = EPIPE;
616 len -= auio.uio_resid;
617 }
618 errout:
619 if (error && error != EINTR && error != ERESTART) {
620 m_freem(*mp);
621 *mp = (struct mbuf *)0;
622 if (error != EPIPE)
623 log(LOG_INFO,
624 "receive error %d from nfs server %s\n",
625 error,
626 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
627 error = nfs_sndlock(&rep->r_nmp->nm_iflag, rep);
628 if (!error)
629 error = nfs_reconnect(rep);
630 if (!error)
631 goto tryagain;
632 else
633 nfs_sndunlock(&rep->r_nmp->nm_iflag);
634 }
635 } else {
636 if ((so = rep->r_nmp->nm_so) == NULL)
637 return (EACCES);
638 if (so->so_state & SS_ISCONNECTED)
639 getnam = (struct mbuf **)0;
640 else
641 getnam = aname;
642 auio.uio_resid = len = 1000000;
643 auio.uio_procp = p;
644 do {
645 rcvflg = 0;
646 error = (*so->so_receive)(so, getnam, &auio, mp,
647 (struct mbuf **)0, &rcvflg);
648 if (error == EWOULDBLOCK &&
649 (rep->r_flags & R_SOFTTERM))
650 return (EINTR);
651 } while (error == EWOULDBLOCK);
652 len -= auio.uio_resid;
653 if (!error && *mp == NULL)
654 error = EPIPE;
655 }
656 if (error) {
657 m_freem(*mp);
658 *mp = (struct mbuf *)0;
659 }
660 return (error);
661 }
662
663 /*
664 * Implement receipt of reply on a socket.
665 * We must search through the list of received datagrams matching them
666 * with outstanding requests using the xid, until ours is found.
667 */
668 /* ARGSUSED */
669 int
670 nfs_reply(myrep)
671 struct nfsreq *myrep;
672 {
673 struct nfsreq *rep;
674 struct nfsmount *nmp = myrep->r_nmp;
675 int32_t t1;
676 struct mbuf *mrep, *nam, *md;
677 u_int32_t rxid, *tl;
678 caddr_t dpos, cp2;
679 int error;
680
681 /*
682 * Loop around until we get our own reply
683 */
684 for (;;) {
685 /*
686 * Lock against other receivers so that I don't get stuck in
687 * sbwait() after someone else has received my reply for me.
688 * Also necessary for connection based protocols to avoid
689 * race conditions during a reconnect.
690 */
691 error = nfs_rcvlock(myrep);
692 if (error == EALREADY)
693 return (0);
694 if (error)
695 return (error);
696 /*
697 * Get the next Rpc reply off the socket
698 */
699 nmp->nm_waiters++;
700 error = nfs_receive(myrep, &nam, &mrep);
701 nfs_rcvunlock(&nmp->nm_iflag);
702 if (error) {
703
704 if (nmp->nm_iflag & NFSMNT_DISMNT) {
705 /*
706 * Oops, we're going away now..
707 */
708 nmp->nm_waiters--;
709 wakeup (&nmp->nm_waiters);
710 return error;
711 }
712 nmp->nm_waiters--;
713 /*
714 * Ignore routing errors on connectionless protocols??
715 */
716 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
717 nmp->nm_so->so_error = 0;
718 #ifdef DEBUG
719 printf("nfs_reply: ignoring error %d\n", error);
720 #endif
721 if (myrep->r_flags & R_GETONEREP)
722 return (0);
723 continue;
724 }
725 return (error);
726 }
727 nmp->nm_waiters--;
728 if (nam)
729 m_freem(nam);
730
731 /*
732 * Get the xid and check that it is an rpc reply
733 */
734 md = mrep;
735 dpos = mtod(md, caddr_t);
736 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED);
737 rxid = *tl++;
738 if (*tl != rpc_reply) {
739 if (nmp->nm_flag & NFSMNT_NQNFS) {
740 if (nqnfs_callback(nmp, mrep, md, dpos))
741 nfsstats.rpcinvalid++;
742 } else {
743 nfsstats.rpcinvalid++;
744 m_freem(mrep);
745 }
746 nfsmout:
747 if (myrep->r_flags & R_GETONEREP)
748 return (0);
749 continue;
750 }
751
752 /*
753 * Loop through the request list to match up the reply
754 * Iff no match, just drop the datagram
755 */
756 for (rep = nfs_reqq.tqh_first; rep != 0;
757 rep = rep->r_chain.tqe_next) {
758 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
759 /* Found it.. */
760 rep->r_mrep = mrep;
761 rep->r_md = md;
762 rep->r_dpos = dpos;
763 if (nfsrtton) {
764 struct rttl *rt;
765
766 rt = &nfsrtt.rttl[nfsrtt.pos];
767 rt->proc = rep->r_procnum;
768 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
769 rt->sent = nmp->nm_sent;
770 rt->cwnd = nmp->nm_cwnd;
771 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
772 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
773 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
774 rt->tstamp = time;
775 if (rep->r_flags & R_TIMING)
776 rt->rtt = rep->r_rtt;
777 else
778 rt->rtt = 1000000;
779 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
780 }
781 /*
782 * Update congestion window.
783 * Do the additive increase of
784 * one rpc/rtt.
785 */
786 if (nmp->nm_cwnd <= nmp->nm_sent) {
787 nmp->nm_cwnd +=
788 (NFS_CWNDSCALE * NFS_CWNDSCALE +
789 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
790 if (nmp->nm_cwnd > NFS_MAXCWND)
791 nmp->nm_cwnd = NFS_MAXCWND;
792 }
793 rep->r_flags &= ~R_SENT;
794 nmp->nm_sent -= NFS_CWNDSCALE;
795 /*
796 * Update rtt using a gain of 0.125 on the mean
797 * and a gain of 0.25 on the deviation.
798 */
799 if (rep->r_flags & R_TIMING) {
800 /*
801 * Since the timer resolution of
802 * NFS_HZ is so course, it can often
803 * result in r_rtt == 0. Since
804 * r_rtt == N means that the actual
805 * rtt is between N+dt and N+2-dt ticks,
806 * add 1.
807 */
808 t1 = rep->r_rtt + 1;
809 t1 -= (NFS_SRTT(rep) >> 3);
810 NFS_SRTT(rep) += t1;
811 if (t1 < 0)
812 t1 = -t1;
813 t1 -= (NFS_SDRTT(rep) >> 2);
814 NFS_SDRTT(rep) += t1;
815 }
816 nmp->nm_timeouts = 0;
817 break;
818 }
819 }
820 /*
821 * If not matched to a request, drop it.
822 * If it's mine, get out.
823 */
824 if (rep == 0) {
825 nfsstats.rpcunexpected++;
826 m_freem(mrep);
827 } else if (rep == myrep) {
828 if (rep->r_mrep == NULL)
829 panic("nfsreply nil");
830 return (0);
831 }
832 if (myrep->r_flags & R_GETONEREP)
833 return (0);
834 }
835 }
836
837 /*
838 * nfs_request - goes something like this
839 * - fill in request struct
840 * - links it into list
841 * - calls nfs_send() for first transmit
842 * - calls nfs_receive() to get reply
843 * - break down rpc header and return with nfs reply pointed to
844 * by mrep or error
845 * nb: always frees up mreq mbuf list
846 */
847 int
848 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
849 struct vnode *vp;
850 struct mbuf *mrest;
851 int procnum;
852 struct proc *procp;
853 struct ucred *cred;
854 struct mbuf **mrp;
855 struct mbuf **mdp;
856 caddr_t *dposp;
857 {
858 struct mbuf *m, *mrep;
859 struct nfsreq *rep;
860 u_int32_t *tl;
861 int i;
862 struct nfsmount *nmp;
863 struct mbuf *md, *mheadend;
864 struct nfsnode *np;
865 char nickv[RPCX_NICKVERF];
866 time_t reqtime, waituntil;
867 caddr_t dpos, cp2;
868 int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
869 int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
870 int verf_len, verf_type;
871 u_int32_t xid;
872 u_quad_t frev;
873 char *auth_str, *verf_str;
874 NFSKERBKEY_T key; /* save session key */
875
876 nmp = VFSTONFS(vp->v_mount);
877 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
878 rep->r_nmp = nmp;
879 rep->r_vp = vp;
880 rep->r_procp = procp;
881 rep->r_procnum = procnum;
882 i = 0;
883 m = mrest;
884 while (m) {
885 i += m->m_len;
886 m = m->m_next;
887 }
888 mrest_len = i;
889
890 /*
891 * Get the RPC header with authorization.
892 */
893 kerbauth:
894 verf_str = auth_str = (char *)0;
895 if (nmp->nm_flag & NFSMNT_KERB) {
896 verf_str = nickv;
897 verf_len = sizeof (nickv);
898 auth_type = RPCAUTH_KERB4;
899 memset((caddr_t)key, 0, sizeof (key));
900 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
901 &auth_len, verf_str, verf_len)) {
902 error = nfs_getauth(nmp, rep, cred, &auth_str,
903 &auth_len, verf_str, &verf_len, key);
904 if (error) {
905 free((caddr_t)rep, M_NFSREQ);
906 m_freem(mrest);
907 return (error);
908 }
909 }
910 } else {
911 auth_type = RPCAUTH_UNIX;
912 auth_len = (((cred->cr_ngroups > nmp->nm_numgrps) ?
913 nmp->nm_numgrps : cred->cr_ngroups) << 2) +
914 5 * NFSX_UNSIGNED;
915 }
916 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
917 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
918 if (auth_str)
919 free(auth_str, M_TEMP);
920
921 /*
922 * For stream protocols, insert a Sun RPC Record Mark.
923 */
924 if (nmp->nm_sotype == SOCK_STREAM) {
925 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
926 *mtod(m, u_int32_t *) = htonl(0x80000000 |
927 (m->m_pkthdr.len - NFSX_UNSIGNED));
928 }
929 rep->r_mreq = m;
930 rep->r_xid = xid;
931 tryagain:
932 if (nmp->nm_flag & NFSMNT_SOFT)
933 rep->r_retry = nmp->nm_retry;
934 else
935 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
936 rep->r_rtt = rep->r_rexmit = 0;
937 if (proct[procnum] > 0)
938 rep->r_flags = R_TIMING;
939 else
940 rep->r_flags = 0;
941 rep->r_mrep = NULL;
942
943 /*
944 * Do the client side RPC.
945 */
946 nfsstats.rpcrequests++;
947 /*
948 * Chain request into list of outstanding requests. Be sure
949 * to put it LAST so timer finds oldest requests first.
950 */
951 s = splsoftnet();
952 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
953
954 /* Get send time for nqnfs */
955 reqtime = time.tv_sec;
956
957 /*
958 * If backing off another request or avoiding congestion, don't
959 * send this one now but let timer do it. If not timing a request,
960 * do it now.
961 */
962 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
963 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
964 nmp->nm_sent < nmp->nm_cwnd)) {
965 splx(s);
966 if (nmp->nm_soflags & PR_CONNREQUIRED)
967 error = nfs_sndlock(&nmp->nm_iflag, rep);
968 if (!error) {
969 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
970 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
971 if (nmp->nm_soflags & PR_CONNREQUIRED)
972 nfs_sndunlock(&nmp->nm_iflag);
973 }
974 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
975 nmp->nm_sent += NFS_CWNDSCALE;
976 rep->r_flags |= R_SENT;
977 }
978 } else {
979 splx(s);
980 rep->r_rtt = -1;
981 }
982
983 /*
984 * Wait for the reply from our send or the timer's.
985 */
986 if (!error || error == EPIPE)
987 error = nfs_reply(rep);
988
989 /*
990 * RPC done, unlink the request.
991 */
992 s = splsoftnet();
993 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
994 splx(s);
995
996 /*
997 * Decrement the outstanding request count.
998 */
999 if (rep->r_flags & R_SENT) {
1000 rep->r_flags &= ~R_SENT; /* paranoia */
1001 nmp->nm_sent -= NFS_CWNDSCALE;
1002 }
1003
1004 /*
1005 * If there was a successful reply and a tprintf msg.
1006 * tprintf a response.
1007 */
1008 if (!error && (rep->r_flags & R_TPRINTFMSG))
1009 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
1010 "is alive again");
1011 mrep = rep->r_mrep;
1012 md = rep->r_md;
1013 dpos = rep->r_dpos;
1014 if (error) {
1015 m_freem(rep->r_mreq);
1016 free((caddr_t)rep, M_NFSREQ);
1017 return (error);
1018 }
1019
1020 /*
1021 * break down the rpc header and check if ok
1022 */
1023 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1024 if (*tl++ == rpc_msgdenied) {
1025 if (*tl == rpc_mismatch)
1026 error = EOPNOTSUPP;
1027 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1028 if (!failed_auth) {
1029 failed_auth++;
1030 mheadend->m_next = (struct mbuf *)0;
1031 m_freem(mrep);
1032 m_freem(rep->r_mreq);
1033 goto kerbauth;
1034 } else
1035 error = EAUTH;
1036 } else
1037 error = EACCES;
1038 m_freem(mrep);
1039 m_freem(rep->r_mreq);
1040 free((caddr_t)rep, M_NFSREQ);
1041 return (error);
1042 }
1043
1044 /*
1045 * Grab any Kerberos verifier, otherwise just throw it away.
1046 */
1047 verf_type = fxdr_unsigned(int, *tl++);
1048 i = fxdr_unsigned(int32_t, *tl);
1049 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1050 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1051 if (error)
1052 goto nfsmout;
1053 } else if (i > 0)
1054 nfsm_adv(nfsm_rndup(i));
1055 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1056 /* 0 == ok */
1057 if (*tl == 0) {
1058 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1059 if (*tl != 0) {
1060 error = fxdr_unsigned(int, *tl);
1061 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1062 error == NFSERR_TRYLATER) {
1063 m_freem(mrep);
1064 error = 0;
1065 waituntil = time.tv_sec + trylater_delay;
1066 while (time.tv_sec < waituntil)
1067 (void) tsleep((caddr_t)&lbolt,
1068 PSOCK, "nqnfstry", 0);
1069 trylater_delay *= nfs_backoff[trylater_cnt];
1070 if (trylater_cnt < 7)
1071 trylater_cnt++;
1072 goto tryagain;
1073 }
1074
1075 /*
1076 * If the File Handle was stale, invalidate the
1077 * lookup cache, just in case.
1078 */
1079 if (error == ESTALE)
1080 cache_purge(vp);
1081 if (nmp->nm_flag & NFSMNT_NFSV3) {
1082 *mrp = mrep;
1083 *mdp = md;
1084 *dposp = dpos;
1085 error |= NFSERR_RETERR;
1086 } else
1087 m_freem(mrep);
1088 m_freem(rep->r_mreq);
1089 free((caddr_t)rep, M_NFSREQ);
1090 return (error);
1091 }
1092
1093 /*
1094 * For nqnfs, get any lease in reply
1095 */
1096 if (nmp->nm_flag & NFSMNT_NQNFS) {
1097 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1098 if (*tl) {
1099 np = VTONFS(vp);
1100 nqlflag = fxdr_unsigned(int, *tl);
1101 nfsm_dissect(tl, u_int32_t *, 4*NFSX_UNSIGNED);
1102 cachable = fxdr_unsigned(int, *tl++);
1103 reqtime += fxdr_unsigned(int, *tl++);
1104 if (reqtime > time.tv_sec) {
1105 frev = fxdr_hyper(tl);
1106 nqnfs_clientlease(nmp, np, nqlflag,
1107 cachable, reqtime, frev);
1108 }
1109 }
1110 }
1111 *mrp = mrep;
1112 *mdp = md;
1113 *dposp = dpos;
1114 m_freem(rep->r_mreq);
1115 FREE((caddr_t)rep, M_NFSREQ);
1116 return (0);
1117 }
1118 m_freem(mrep);
1119 error = EPROTONOSUPPORT;
1120 nfsmout:
1121 m_freem(rep->r_mreq);
1122 free((caddr_t)rep, M_NFSREQ);
1123 return (error);
1124 }
1125 #endif /* NFS */
1126
1127 /*
1128 * Generate the rpc reply header
1129 * siz arg. is used to decide if adding a cluster is worthwhile
1130 */
1131 int
1132 nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
1133 int siz;
1134 struct nfsrv_descript *nd;
1135 struct nfssvc_sock *slp;
1136 int err;
1137 int cache;
1138 u_quad_t *frev;
1139 struct mbuf **mrq;
1140 struct mbuf **mbp;
1141 caddr_t *bposp;
1142 {
1143 u_int32_t *tl;
1144 struct mbuf *mreq;
1145 caddr_t bpos;
1146 struct mbuf *mb, *mb2;
1147
1148 MGETHDR(mreq, M_WAIT, MT_DATA);
1149 mb = mreq;
1150 /*
1151 * If this is a big reply, use a cluster else
1152 * try and leave leading space for the lower level headers.
1153 */
1154 siz += RPC_REPLYSIZ;
1155 if (siz >= max_datalen) {
1156 MCLGET(mreq, M_WAIT);
1157 } else
1158 mreq->m_data += max_hdr;
1159 tl = mtod(mreq, u_int32_t *);
1160 mreq->m_len = 6 * NFSX_UNSIGNED;
1161 bpos = ((caddr_t)tl) + mreq->m_len;
1162 *tl++ = txdr_unsigned(nd->nd_retxid);
1163 *tl++ = rpc_reply;
1164 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1165 *tl++ = rpc_msgdenied;
1166 if (err & NFSERR_AUTHERR) {
1167 *tl++ = rpc_autherr;
1168 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1169 mreq->m_len -= NFSX_UNSIGNED;
1170 bpos -= NFSX_UNSIGNED;
1171 } else {
1172 *tl++ = rpc_mismatch;
1173 *tl++ = txdr_unsigned(RPC_VER2);
1174 *tl = txdr_unsigned(RPC_VER2);
1175 }
1176 } else {
1177 *tl++ = rpc_msgaccepted;
1178
1179 /*
1180 * For Kerberos authentication, we must send the nickname
1181 * verifier back, otherwise just RPCAUTH_NULL.
1182 */
1183 if (nd->nd_flag & ND_KERBFULL) {
1184 struct nfsuid *nuidp;
1185 struct timeval ktvin, ktvout;
1186
1187 for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
1188 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1189 if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
1190 (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
1191 &nuidp->nu_haddr, nd->nd_nam2)))
1192 break;
1193 }
1194 if (nuidp) {
1195 ktvin.tv_sec =
1196 txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
1197 ktvin.tv_usec =
1198 txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1199
1200 /*
1201 * Encrypt the timestamp in ecb mode using the
1202 * session key.
1203 */
1204 #ifdef NFSKERB
1205 XXX
1206 #endif
1207
1208 *tl++ = rpc_auth_kerb;
1209 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1210 *tl = ktvout.tv_sec;
1211 nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1212 *tl++ = ktvout.tv_usec;
1213 *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
1214 } else {
1215 *tl++ = 0;
1216 *tl++ = 0;
1217 }
1218 } else {
1219 *tl++ = 0;
1220 *tl++ = 0;
1221 }
1222 switch (err) {
1223 case EPROGUNAVAIL:
1224 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1225 break;
1226 case EPROGMISMATCH:
1227 *tl = txdr_unsigned(RPC_PROGMISMATCH);
1228 nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1229 if (nd->nd_flag & ND_NQNFS) {
1230 *tl++ = txdr_unsigned(3);
1231 *tl = txdr_unsigned(3);
1232 } else {
1233 *tl++ = txdr_unsigned(2);
1234 *tl = txdr_unsigned(3);
1235 }
1236 break;
1237 case EPROCUNAVAIL:
1238 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1239 break;
1240 case EBADRPC:
1241 *tl = txdr_unsigned(RPC_GARBAGE);
1242 break;
1243 default:
1244 *tl = 0;
1245 if (err != NFSERR_RETVOID) {
1246 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1247 if (err)
1248 *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1249 else
1250 *tl = 0;
1251 }
1252 break;
1253 };
1254 }
1255
1256 /*
1257 * For nqnfs, piggyback lease as requested.
1258 */
1259 if ((nd->nd_flag & ND_NQNFS) && err == 0) {
1260 if (nd->nd_flag & ND_LEASE) {
1261 nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1262 *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
1263 *tl++ = txdr_unsigned(cache);
1264 *tl++ = txdr_unsigned(nd->nd_duration);
1265 txdr_hyper(*frev, tl);
1266 } else {
1267 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1268 *tl = 0;
1269 }
1270 }
1271 if (mrq != NULL)
1272 *mrq = mreq;
1273 *mbp = mb;
1274 *bposp = bpos;
1275 if (err != 0 && err != NFSERR_RETVOID)
1276 nfsstats.srvrpc_errs++;
1277 return (0);
1278 }
1279
1280 /*
1281 * Nfs timer routine
1282 * Scan the nfsreq list and retranmit any requests that have timed out
1283 * To avoid retransmission attempts on STREAM sockets (in the future) make
1284 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1285 */
1286 void
1287 nfs_timer(arg)
1288 void *arg; /* never used */
1289 {
1290 struct nfsreq *rep;
1291 struct mbuf *m;
1292 struct socket *so;
1293 struct nfsmount *nmp;
1294 int timeo;
1295 int s, error;
1296 #ifdef NFSSERVER
1297 struct nfssvc_sock *slp;
1298 static long lasttime = 0;
1299 u_quad_t cur_usec;
1300 #endif
1301
1302 s = splsoftnet();
1303 for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
1304 nmp = rep->r_nmp;
1305 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1306 continue;
1307 if (nfs_sigintr(nmp, rep, rep->r_procp)) {
1308 rep->r_flags |= R_SOFTTERM;
1309 continue;
1310 }
1311 if (rep->r_rtt >= 0) {
1312 rep->r_rtt++;
1313 if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1314 timeo = nmp->nm_timeo;
1315 else
1316 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
1317 if (nmp->nm_timeouts > 0)
1318 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1319 if (rep->r_rtt <= timeo)
1320 continue;
1321 if (nmp->nm_timeouts < 8)
1322 nmp->nm_timeouts++;
1323 }
1324 /*
1325 * Check for server not responding
1326 */
1327 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
1328 rep->r_rexmit > nmp->nm_deadthresh) {
1329 nfs_msg(rep->r_procp,
1330 nmp->nm_mountp->mnt_stat.f_mntfromname,
1331 "not responding");
1332 rep->r_flags |= R_TPRINTFMSG;
1333 }
1334 if (rep->r_rexmit >= rep->r_retry) { /* too many */
1335 nfsstats.rpctimeouts++;
1336 rep->r_flags |= R_SOFTTERM;
1337 continue;
1338 }
1339 if (nmp->nm_sotype != SOCK_DGRAM) {
1340 if (++rep->r_rexmit > NFS_MAXREXMIT)
1341 rep->r_rexmit = NFS_MAXREXMIT;
1342 continue;
1343 }
1344 if ((so = nmp->nm_so) == NULL)
1345 continue;
1346
1347 /*
1348 * If there is enough space and the window allows..
1349 * Resend it
1350 * Set r_rtt to -1 in case we fail to send it now.
1351 */
1352 rep->r_rtt = -1;
1353 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1354 ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1355 (rep->r_flags & R_SENT) ||
1356 nmp->nm_sent < nmp->nm_cwnd) &&
1357 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1358 if (so->so_state & SS_ISCONNECTED)
1359 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1360 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
1361 else
1362 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1363 nmp->nm_nam, (struct mbuf *)0, (struct proc *)0);
1364 if (error) {
1365 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
1366 #ifdef DEBUG
1367 printf("nfs_timer: ignoring error %d\n",
1368 error);
1369 #endif
1370 so->so_error = 0;
1371 }
1372 } else {
1373 /*
1374 * Iff first send, start timing
1375 * else turn timing off, backoff timer
1376 * and divide congestion window by 2.
1377 */
1378 if (rep->r_flags & R_SENT) {
1379 rep->r_flags &= ~R_TIMING;
1380 if (++rep->r_rexmit > NFS_MAXREXMIT)
1381 rep->r_rexmit = NFS_MAXREXMIT;
1382 nmp->nm_cwnd >>= 1;
1383 if (nmp->nm_cwnd < NFS_CWNDSCALE)
1384 nmp->nm_cwnd = NFS_CWNDSCALE;
1385 nfsstats.rpcretries++;
1386 } else {
1387 rep->r_flags |= R_SENT;
1388 nmp->nm_sent += NFS_CWNDSCALE;
1389 }
1390 rep->r_rtt = 0;
1391 }
1392 }
1393 }
1394
1395 #ifdef NFSSERVER
1396 /*
1397 * Call the nqnfs server timer once a second to handle leases.
1398 */
1399 if (lasttime != time.tv_sec) {
1400 lasttime = time.tv_sec;
1401 nqnfs_serverd();
1402 }
1403
1404 /*
1405 * Scan the write gathering queues for writes that need to be
1406 * completed now.
1407 */
1408 cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec;
1409 for (slp = nfssvc_sockhead.tqh_first; slp != 0;
1410 slp = slp->ns_chain.tqe_next) {
1411 if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec)
1412 nfsrv_wakenfsd(slp);
1413 }
1414 #endif /* NFSSERVER */
1415 splx(s);
1416 callout_reset(&nfs_timer_ch, nfs_ticks, nfs_timer, NULL);
1417 }
1418
1419 /*
1420 * Test for a termination condition pending on the process.
1421 * This is used for NFSMNT_INT mounts.
1422 */
1423 int
1424 nfs_sigintr(nmp, rep, p)
1425 struct nfsmount *nmp;
1426 struct nfsreq *rep;
1427 struct proc *p;
1428 {
1429 sigset_t ss;
1430
1431 if (rep && (rep->r_flags & R_SOFTTERM))
1432 return (EINTR);
1433 if (!(nmp->nm_flag & NFSMNT_INT))
1434 return (0);
1435 if (p) {
1436 sigpending1(p, &ss);
1437 #if 0
1438 sigminusset(&p->p_sigignore, &ss);
1439 #endif
1440 if (sigismember(&ss, SIGINT) || sigismember(&ss, SIGTERM) ||
1441 sigismember(&ss, SIGKILL) || sigismember(&ss, SIGHUP) ||
1442 sigismember(&ss, SIGQUIT))
1443 return (EINTR);
1444 }
1445 return (0);
1446 }
1447
1448 /*
1449 * Lock a socket against others.
1450 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1451 * and also to avoid race conditions between the processes with nfs requests
1452 * in progress when a reconnect is necessary.
1453 */
1454 int
1455 nfs_sndlock(flagp, rep)
1456 int *flagp;
1457 struct nfsreq *rep;
1458 {
1459 struct proc *p;
1460 int slpflag = 0, slptimeo = 0;
1461
1462 if (rep) {
1463 p = rep->r_procp;
1464 if (rep->r_nmp->nm_flag & NFSMNT_INT)
1465 slpflag = PCATCH;
1466 } else
1467 p = (struct proc *)0;
1468 while (*flagp & NFSMNT_SNDLOCK) {
1469 if (nfs_sigintr(rep->r_nmp, rep, p))
1470 return (EINTR);
1471 *flagp |= NFSMNT_WANTSND;
1472 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
1473 slptimeo);
1474 if (slpflag == PCATCH) {
1475 slpflag = 0;
1476 slptimeo = 2 * hz;
1477 }
1478 }
1479 *flagp |= NFSMNT_SNDLOCK;
1480 return (0);
1481 }
1482
1483 /*
1484 * Unlock the stream socket for others.
1485 */
1486 void
1487 nfs_sndunlock(flagp)
1488 int *flagp;
1489 {
1490
1491 if ((*flagp & NFSMNT_SNDLOCK) == 0)
1492 panic("nfs sndunlock");
1493 *flagp &= ~NFSMNT_SNDLOCK;
1494 if (*flagp & NFSMNT_WANTSND) {
1495 *flagp &= ~NFSMNT_WANTSND;
1496 wakeup((caddr_t)flagp);
1497 }
1498 }
1499
1500 int
1501 nfs_rcvlock(rep)
1502 struct nfsreq *rep;
1503 {
1504 struct nfsmount *nmp = rep->r_nmp;
1505 int *flagp = &nmp->nm_iflag;
1506 int slpflag, slptimeo = 0;
1507
1508 if (*flagp & NFSMNT_DISMNT)
1509 return EIO;
1510
1511 if (*flagp & NFSMNT_INT)
1512 slpflag = PCATCH;
1513 else
1514 slpflag = 0;
1515 while (*flagp & NFSMNT_RCVLOCK) {
1516 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
1517 return (EINTR);
1518 *flagp |= NFSMNT_WANTRCV;
1519 nmp->nm_waiters++;
1520 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
1521 slptimeo);
1522 nmp->nm_waiters--;
1523 if (*flagp & NFSMNT_DISMNT) {
1524 wakeup(&nmp->nm_waiters);
1525 return EIO;
1526 }
1527 /* If our reply was received while we were sleeping,
1528 * then just return without taking the lock to avoid a
1529 * situation where a single iod could 'capture' the
1530 * receive lock.
1531 */
1532 if (rep->r_mrep != NULL)
1533 return (EALREADY);
1534 if (slpflag == PCATCH) {
1535 slpflag = 0;
1536 slptimeo = 2 * hz;
1537 }
1538 }
1539 *flagp |= NFSMNT_RCVLOCK;
1540 return (0);
1541 }
1542
1543 /*
1544 * Unlock the stream socket for others.
1545 */
1546 void
1547 nfs_rcvunlock(flagp)
1548 int *flagp;
1549 {
1550
1551 if ((*flagp & NFSMNT_RCVLOCK) == 0)
1552 panic("nfs rcvunlock");
1553 *flagp &= ~NFSMNT_RCVLOCK;
1554 if (*flagp & NFSMNT_WANTRCV) {
1555 *flagp &= ~NFSMNT_WANTRCV;
1556 wakeup((caddr_t)flagp);
1557 }
1558 }
1559
1560 /*
1561 * Parse an RPC request
1562 * - verify it
1563 * - fill in the cred struct.
1564 */
1565 int
1566 nfs_getreq(nd, nfsd, has_header)
1567 struct nfsrv_descript *nd;
1568 struct nfsd *nfsd;
1569 int has_header;
1570 {
1571 int len, i;
1572 u_int32_t *tl;
1573 int32_t t1;
1574 struct uio uio;
1575 struct iovec iov;
1576 caddr_t dpos, cp2, cp;
1577 u_int32_t nfsvers, auth_type;
1578 uid_t nickuid;
1579 int error = 0, nqnfs = 0, ticklen;
1580 struct mbuf *mrep, *md;
1581 struct nfsuid *nuidp;
1582 struct timeval tvin, tvout;
1583
1584 mrep = nd->nd_mrep;
1585 md = nd->nd_md;
1586 dpos = nd->nd_dpos;
1587 if (has_header) {
1588 nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED);
1589 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
1590 if (*tl++ != rpc_call) {
1591 m_freem(mrep);
1592 return (EBADRPC);
1593 }
1594 } else
1595 nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
1596 nd->nd_repstat = 0;
1597 nd->nd_flag = 0;
1598 if (*tl++ != rpc_vers) {
1599 nd->nd_repstat = ERPCMISMATCH;
1600 nd->nd_procnum = NFSPROC_NOOP;
1601 return (0);
1602 }
1603 if (*tl != nfs_prog) {
1604 if (*tl == nqnfs_prog)
1605 nqnfs++;
1606 else {
1607 nd->nd_repstat = EPROGUNAVAIL;
1608 nd->nd_procnum = NFSPROC_NOOP;
1609 return (0);
1610 }
1611 }
1612 tl++;
1613 nfsvers = fxdr_unsigned(u_int32_t, *tl++);
1614 if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
1615 (nfsvers != NQNFS_VER3 && nqnfs)) {
1616 nd->nd_repstat = EPROGMISMATCH;
1617 nd->nd_procnum = NFSPROC_NOOP;
1618 return (0);
1619 }
1620 if (nqnfs)
1621 nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
1622 else if (nfsvers == NFS_VER3)
1623 nd->nd_flag = ND_NFSV3;
1624 nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
1625 if (nd->nd_procnum == NFSPROC_NULL)
1626 return (0);
1627 if (nd->nd_procnum >= NFS_NPROCS ||
1628 (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
1629 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
1630 nd->nd_repstat = EPROCUNAVAIL;
1631 nd->nd_procnum = NFSPROC_NOOP;
1632 return (0);
1633 }
1634 if ((nd->nd_flag & ND_NFSV3) == 0)
1635 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
1636 auth_type = *tl++;
1637 len = fxdr_unsigned(int, *tl++);
1638 if (len < 0 || len > RPCAUTH_MAXSIZ) {
1639 m_freem(mrep);
1640 return (EBADRPC);
1641 }
1642
1643 nd->nd_flag &= ~ND_KERBAUTH;
1644 /*
1645 * Handle auth_unix or auth_kerb.
1646 */
1647 if (auth_type == rpc_auth_unix) {
1648 len = fxdr_unsigned(int, *++tl);
1649 if (len < 0 || len > NFS_MAXNAMLEN) {
1650 m_freem(mrep);
1651 return (EBADRPC);
1652 }
1653 nfsm_adv(nfsm_rndup(len));
1654 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1655 memset((caddr_t)&nd->nd_cr, 0, sizeof (struct ucred));
1656 nd->nd_cr.cr_ref = 1;
1657 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
1658 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
1659 len = fxdr_unsigned(int, *tl);
1660 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
1661 m_freem(mrep);
1662 return (EBADRPC);
1663 }
1664 nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED);
1665 for (i = 0; i < len; i++)
1666 if (i < NGROUPS)
1667 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
1668 else
1669 tl++;
1670 nd->nd_cr.cr_ngroups = (len > NGROUPS) ? NGROUPS : len;
1671 if (nd->nd_cr.cr_ngroups > 1)
1672 nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
1673 len = fxdr_unsigned(int, *++tl);
1674 if (len < 0 || len > RPCAUTH_MAXSIZ) {
1675 m_freem(mrep);
1676 return (EBADRPC);
1677 }
1678 if (len > 0)
1679 nfsm_adv(nfsm_rndup(len));
1680 } else if (auth_type == rpc_auth_kerb) {
1681 switch (fxdr_unsigned(int, *tl++)) {
1682 case RPCAKN_FULLNAME:
1683 ticklen = fxdr_unsigned(int, *tl);
1684 *((u_int32_t *)nfsd->nfsd_authstr) = *tl;
1685 uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
1686 nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
1687 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
1688 m_freem(mrep);
1689 return (EBADRPC);
1690 }
1691 uio.uio_offset = 0;
1692 uio.uio_iov = &iov;
1693 uio.uio_iovcnt = 1;
1694 uio.uio_segflg = UIO_SYSSPACE;
1695 iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
1696 iov.iov_len = RPCAUTH_MAXSIZ - 4;
1697 nfsm_mtouio(&uio, uio.uio_resid);
1698 nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1699 if (*tl++ != rpc_auth_kerb ||
1700 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
1701 printf("Bad kerb verifier\n");
1702 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
1703 nd->nd_procnum = NFSPROC_NOOP;
1704 return (0);
1705 }
1706 nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
1707 tl = (u_int32_t *)cp;
1708 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
1709 printf("Not fullname kerb verifier\n");
1710 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
1711 nd->nd_procnum = NFSPROC_NOOP;
1712 return (0);
1713 }
1714 cp += NFSX_UNSIGNED;
1715 memcpy(nfsd->nfsd_verfstr, cp, 3 * NFSX_UNSIGNED);
1716 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
1717 nd->nd_flag |= ND_KERBFULL;
1718 nfsd->nfsd_flag |= NFSD_NEEDAUTH;
1719 break;
1720 case RPCAKN_NICKNAME:
1721 if (len != 2 * NFSX_UNSIGNED) {
1722 printf("Kerb nickname short\n");
1723 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
1724 nd->nd_procnum = NFSPROC_NOOP;
1725 return (0);
1726 }
1727 nickuid = fxdr_unsigned(uid_t, *tl);
1728 nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1729 if (*tl++ != rpc_auth_kerb ||
1730 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
1731 printf("Kerb nick verifier bad\n");
1732 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
1733 nd->nd_procnum = NFSPROC_NOOP;
1734 return (0);
1735 }
1736 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1737 tvin.tv_sec = *tl++;
1738 tvin.tv_usec = *tl;
1739
1740 for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
1741 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1742 if (nuidp->nu_cr.cr_uid == nickuid &&
1743 (!nd->nd_nam2 ||
1744 netaddr_match(NU_NETFAM(nuidp),
1745 &nuidp->nu_haddr, nd->nd_nam2)))
1746 break;
1747 }
1748 if (!nuidp) {
1749 nd->nd_repstat =
1750 (NFSERR_AUTHERR|AUTH_REJECTCRED);
1751 nd->nd_procnum = NFSPROC_NOOP;
1752 return (0);
1753 }
1754
1755 /*
1756 * Now, decrypt the timestamp using the session key
1757 * and validate it.
1758 */
1759 #ifdef NFSKERB
1760 XXX
1761 #endif
1762
1763 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
1764 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
1765 if (nuidp->nu_expire < time.tv_sec ||
1766 nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
1767 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
1768 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
1769 nuidp->nu_expire = 0;
1770 nd->nd_repstat =
1771 (NFSERR_AUTHERR|AUTH_REJECTVERF);
1772 nd->nd_procnum = NFSPROC_NOOP;
1773 return (0);
1774 }
1775 nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
1776 nd->nd_flag |= ND_KERBNICK;
1777 };
1778 } else {
1779 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
1780 nd->nd_procnum = NFSPROC_NOOP;
1781 return (0);
1782 }
1783
1784 /*
1785 * For nqnfs, get piggybacked lease request.
1786 */
1787 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
1788 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1789 nd->nd_flag |= fxdr_unsigned(int, *tl);
1790 if (nd->nd_flag & ND_LEASE) {
1791 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1792 nd->nd_duration = fxdr_unsigned(u_int32_t, *tl);
1793 } else
1794 nd->nd_duration = NQ_MINLEASE;
1795 } else
1796 nd->nd_duration = NQ_MINLEASE;
1797 nd->nd_md = md;
1798 nd->nd_dpos = dpos;
1799 return (0);
1800 nfsmout:
1801 return (error);
1802 }
1803
1804 int
1805 nfs_msg(p, server, msg)
1806 struct proc *p;
1807 char *server, *msg;
1808 {
1809 tpr_t tpr;
1810
1811 if (p)
1812 tpr = tprintf_open(p);
1813 else
1814 tpr = NULL;
1815 tprintf(tpr, "nfs server %s: %s\n", server, msg);
1816 tprintf_close(tpr);
1817 return (0);
1818 }
1819
1820 #ifdef NFSSERVER
1821 int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *,
1822 struct nfssvc_sock *, struct proc *,
1823 struct mbuf **)) = {
1824 nfsrv_null,
1825 nfsrv_getattr,
1826 nfsrv_setattr,
1827 nfsrv_lookup,
1828 nfsrv3_access,
1829 nfsrv_readlink,
1830 nfsrv_read,
1831 nfsrv_write,
1832 nfsrv_create,
1833 nfsrv_mkdir,
1834 nfsrv_symlink,
1835 nfsrv_mknod,
1836 nfsrv_remove,
1837 nfsrv_rmdir,
1838 nfsrv_rename,
1839 nfsrv_link,
1840 nfsrv_readdir,
1841 nfsrv_readdirplus,
1842 nfsrv_statfs,
1843 nfsrv_fsinfo,
1844 nfsrv_pathconf,
1845 nfsrv_commit,
1846 nqnfsrv_getlease,
1847 nqnfsrv_vacated,
1848 nfsrv_noop,
1849 nfsrv_noop
1850 };
1851
1852 /*
1853 * Socket upcall routine for the nfsd sockets.
1854 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
1855 * Essentially do as much as possible non-blocking, else punt and it will
1856 * be called with M_WAIT from an nfsd.
1857 */
1858 void
1859 nfsrv_rcv(so, arg, waitflag)
1860 struct socket *so;
1861 caddr_t arg;
1862 int waitflag;
1863 {
1864 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
1865 struct mbuf *m;
1866 struct mbuf *mp, *nam;
1867 struct uio auio;
1868 int flags, error;
1869
1870 if ((slp->ns_flag & SLP_VALID) == 0)
1871 return;
1872 #ifdef notdef
1873 /*
1874 * Define this to test for nfsds handling this under heavy load.
1875 */
1876 if (waitflag == M_DONTWAIT) {
1877 slp->ns_flag |= SLP_NEEDQ; goto dorecs;
1878 }
1879 #endif
1880 auio.uio_procp = NULL;
1881 if (so->so_type == SOCK_STREAM) {
1882 /*
1883 * If there are already records on the queue, defer soreceive()
1884 * to an nfsd so that there is feedback to the TCP layer that
1885 * the nfs servers are heavily loaded.
1886 */
1887 if (slp->ns_rec && waitflag == M_DONTWAIT) {
1888 slp->ns_flag |= SLP_NEEDQ;
1889 goto dorecs;
1890 }
1891
1892 /*
1893 * Do soreceive().
1894 */
1895 auio.uio_resid = 1000000000;
1896 flags = MSG_DONTWAIT;
1897 error = (*so->so_receive)(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
1898 if (error || mp == (struct mbuf *)0) {
1899 if (error == EWOULDBLOCK)
1900 slp->ns_flag |= SLP_NEEDQ;
1901 else
1902 slp->ns_flag |= SLP_DISCONN;
1903 goto dorecs;
1904 }
1905 m = mp;
1906 if (slp->ns_rawend) {
1907 slp->ns_rawend->m_next = m;
1908 slp->ns_cc += 1000000000 - auio.uio_resid;
1909 } else {
1910 slp->ns_raw = m;
1911 slp->ns_cc = 1000000000 - auio.uio_resid;
1912 }
1913 while (m->m_next)
1914 m = m->m_next;
1915 slp->ns_rawend = m;
1916
1917 /*
1918 * Now try and parse record(s) out of the raw stream data.
1919 */
1920 error = nfsrv_getstream(slp, waitflag);
1921 if (error) {
1922 if (error == EPERM)
1923 slp->ns_flag |= SLP_DISCONN;
1924 else
1925 slp->ns_flag |= SLP_NEEDQ;
1926 }
1927 } else {
1928 do {
1929 auio.uio_resid = 1000000000;
1930 flags = MSG_DONTWAIT;
1931 error = (*so->so_receive)(so, &nam, &auio, &mp,
1932 (struct mbuf **)0, &flags);
1933 if (mp) {
1934 if (nam) {
1935 m = nam;
1936 m->m_next = mp;
1937 } else
1938 m = mp;
1939 if (slp->ns_recend)
1940 slp->ns_recend->m_nextpkt = m;
1941 else
1942 slp->ns_rec = m;
1943 slp->ns_recend = m;
1944 m->m_nextpkt = (struct mbuf *)0;
1945 }
1946 if (error) {
1947 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
1948 && error != EWOULDBLOCK) {
1949 slp->ns_flag |= SLP_DISCONN;
1950 goto dorecs;
1951 }
1952 }
1953 } while (mp);
1954 }
1955
1956 /*
1957 * Now try and process the request records, non-blocking.
1958 */
1959 dorecs:
1960 if (waitflag == M_DONTWAIT &&
1961 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
1962 nfsrv_wakenfsd(slp);
1963 }
1964
1965 /*
1966 * Try and extract an RPC request from the mbuf data list received on a
1967 * stream socket. The "waitflag" argument indicates whether or not it
1968 * can sleep.
1969 */
1970 int
1971 nfsrv_getstream(slp, waitflag)
1972 struct nfssvc_sock *slp;
1973 int waitflag;
1974 {
1975 struct mbuf *m, **mpp;
1976 char *cp1, *cp2;
1977 int len;
1978 struct mbuf *om, *m2, *recm = NULL;
1979 u_int32_t recmark;
1980
1981 if (slp->ns_flag & SLP_GETSTREAM)
1982 panic("nfs getstream");
1983 slp->ns_flag |= SLP_GETSTREAM;
1984 for (;;) {
1985 if (slp->ns_reclen == 0) {
1986 if (slp->ns_cc < NFSX_UNSIGNED) {
1987 slp->ns_flag &= ~SLP_GETSTREAM;
1988 return (0);
1989 }
1990 m = slp->ns_raw;
1991 if (m->m_len >= NFSX_UNSIGNED) {
1992 memcpy((caddr_t)&recmark, mtod(m, caddr_t), NFSX_UNSIGNED);
1993 m->m_data += NFSX_UNSIGNED;
1994 m->m_len -= NFSX_UNSIGNED;
1995 } else {
1996 cp1 = (caddr_t)&recmark;
1997 cp2 = mtod(m, caddr_t);
1998 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
1999 while (m->m_len == 0) {
2000 m = m->m_next;
2001 cp2 = mtod(m, caddr_t);
2002 }
2003 *cp1++ = *cp2++;
2004 m->m_data++;
2005 m->m_len--;
2006 }
2007 }
2008 slp->ns_cc -= NFSX_UNSIGNED;
2009 recmark = ntohl(recmark);
2010 slp->ns_reclen = recmark & ~0x80000000;
2011 if (recmark & 0x80000000)
2012 slp->ns_flag |= SLP_LASTFRAG;
2013 else
2014 slp->ns_flag &= ~SLP_LASTFRAG;
2015 if (slp->ns_reclen > NFS_MAXPACKET) {
2016 slp->ns_flag &= ~SLP_GETSTREAM;
2017 return (EPERM);
2018 }
2019 }
2020
2021 /*
2022 * Now get the record part.
2023 */
2024 if (slp->ns_cc == slp->ns_reclen) {
2025 recm = slp->ns_raw;
2026 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
2027 slp->ns_cc = slp->ns_reclen = 0;
2028 } else if (slp->ns_cc > slp->ns_reclen) {
2029 len = 0;
2030 m = slp->ns_raw;
2031 om = (struct mbuf *)0;
2032 while (len < slp->ns_reclen) {
2033 if ((len + m->m_len) > slp->ns_reclen) {
2034 size_t left = slp->ns_reclen - len;
2035
2036 MGETHDR(m2, waitflag, m->m_type);
2037 if (m2 == NULL) {
2038 slp->ns_flag &= ~SLP_GETSTREAM;
2039 return (EWOULDBLOCK);
2040 }
2041 if (left > MHLEN) {
2042 MCLGET(m2, waitflag);
2043 if (!(m2->m_flags & M_EXT)) {
2044 m_freem(m2);
2045 slp->ns_flag &= ~SLP_GETSTREAM;
2046 return (EWOULDBLOCK);
2047 }
2048 }
2049 memcpy(mtod(m2, caddr_t), mtod(m, caddr_t),
2050 left);
2051 m2->m_len = left;
2052 m->m_data += left;
2053 m->m_len -= left;
2054 if (om) {
2055 om->m_next = m2;
2056 recm = slp->ns_raw;
2057 } else
2058 recm = m2;
2059 len = slp->ns_reclen;
2060 } else if ((len + m->m_len) == slp->ns_reclen) {
2061 om = m;
2062 len += m->m_len;
2063 m = m->m_next;
2064 recm = slp->ns_raw;
2065 om->m_next = (struct mbuf *)0;
2066 } else {
2067 om = m;
2068 len += m->m_len;
2069 m = m->m_next;
2070 }
2071 }
2072 slp->ns_raw = m;
2073 slp->ns_cc -= len;
2074 slp->ns_reclen = 0;
2075 } else {
2076 slp->ns_flag &= ~SLP_GETSTREAM;
2077 return (0);
2078 }
2079
2080 /*
2081 * Accumulate the fragments into a record.
2082 */
2083 mpp = &slp->ns_frag;
2084 while (*mpp)
2085 mpp = &((*mpp)->m_next);
2086 *mpp = recm;
2087 if (slp->ns_flag & SLP_LASTFRAG) {
2088 if (slp->ns_recend)
2089 slp->ns_recend->m_nextpkt = slp->ns_frag;
2090 else
2091 slp->ns_rec = slp->ns_frag;
2092 slp->ns_recend = slp->ns_frag;
2093 slp->ns_frag = (struct mbuf *)0;
2094 }
2095 }
2096 }
2097
2098 /*
2099 * Parse an RPC header.
2100 */
2101 int
2102 nfsrv_dorec(slp, nfsd, ndp)
2103 struct nfssvc_sock *slp;
2104 struct nfsd *nfsd;
2105 struct nfsrv_descript **ndp;
2106 {
2107 struct mbuf *m, *nam;
2108 struct nfsrv_descript *nd;
2109 int error;
2110
2111 *ndp = NULL;
2112 if ((slp->ns_flag & SLP_VALID) == 0 ||
2113 (m = slp->ns_rec) == (struct mbuf *)0)
2114 return (ENOBUFS);
2115 slp->ns_rec = m->m_nextpkt;
2116 if (slp->ns_rec)
2117 m->m_nextpkt = (struct mbuf *)0;
2118 else
2119 slp->ns_recend = (struct mbuf *)0;
2120 if (m->m_type == MT_SONAME) {
2121 nam = m;
2122 m = m->m_next;
2123 nam->m_next = NULL;
2124 } else
2125 nam = NULL;
2126 MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript),
2127 M_NFSRVDESC, M_WAITOK);
2128 nd->nd_md = nd->nd_mrep = m;
2129 nd->nd_nam2 = nam;
2130 nd->nd_dpos = mtod(m, caddr_t);
2131 error = nfs_getreq(nd, nfsd, TRUE);
2132 if (error) {
2133 m_freem(nam);
2134 free((caddr_t)nd, M_NFSRVDESC);
2135 return (error);
2136 }
2137 *ndp = nd;
2138 nfsd->nfsd_nd = nd;
2139 return (0);
2140 }
2141
2142
2143 /*
2144 * Search for a sleeping nfsd and wake it up.
2145 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
2146 * running nfsds will go look for the work in the nfssvc_sock list.
2147 */
2148 void
2149 nfsrv_wakenfsd(slp)
2150 struct nfssvc_sock *slp;
2151 {
2152 struct nfsd *nd;
2153
2154 if ((slp->ns_flag & SLP_VALID) == 0)
2155 return;
2156 for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) {
2157 if (nd->nfsd_flag & NFSD_WAITING) {
2158 nd->nfsd_flag &= ~NFSD_WAITING;
2159 if (nd->nfsd_slp)
2160 panic("nfsd wakeup");
2161 slp->ns_sref++;
2162 nd->nfsd_slp = slp;
2163 wakeup((caddr_t)nd);
2164 return;
2165 }
2166 }
2167 slp->ns_flag |= SLP_DOREC;
2168 nfsd_head_flag |= NFSD_CHECKSLP;
2169 }
2170 #endif /* NFSSERVER */
2171