nfs_clntsocket.c revision 1.1 1 /* $NetBSD: nfs_clntsocket.c,v 1.1 2010/03/02 23:19:09 pooka Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1991, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
35 */
36
37 /*
38 * Socket operations for use by nfs
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: nfs_clntsocket.c,v 1.1 2010/03/02 23:19:09 pooka Exp $");
43
44 #ifdef _KERNEL_OPT
45 #include "opt_nfs.h"
46 #include "opt_mbuftrace.h"
47 #endif
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/evcnt.h>
52 #include <sys/callout.h>
53 #include <sys/proc.h>
54 #include <sys/mount.h>
55 #include <sys/kernel.h>
56 #include <sys/kmem.h>
57 #include <sys/mbuf.h>
58 #include <sys/vnode.h>
59 #include <sys/domain.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/socketvar.h>
63 #include <sys/syslog.h>
64 #include <sys/tprintf.h>
65 #include <sys/namei.h>
66 #include <sys/signal.h>
67 #include <sys/signalvar.h>
68 #include <sys/kauth.h>
69
70 #include <netinet/in.h>
71 #include <netinet/tcp.h>
72
73 #include <nfs/rpcv2.h>
74 #include <nfs/nfsproto.h>
75 #include <nfs/nfs.h>
76 #include <nfs/xdr_subs.h>
77 #include <nfs/nfsm_subs.h>
78 #include <nfs/nfsmount.h>
79 #include <nfs/nfsnode.h>
80 #include <nfs/nfsrtt.h>
81 #include <nfs/nfs_var.h>
82
83 static int nfs_sndlock(struct nfsmount *, struct nfsreq *);
84 static void nfs_sndunlock(struct nfsmount *);
85
86 /*
87 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
88 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
89 * Mark and consolidate the data into a new mbuf list.
90 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
91 * small mbufs.
92 * For SOCK_STREAM we must be very careful to read an entire record once
93 * we have read any of it, even if the system call has been interrupted.
94 */
95 static int
96 nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp,
97 struct lwp *l)
98 {
99 struct socket *so;
100 struct uio auio;
101 struct iovec aio;
102 struct mbuf *m;
103 struct mbuf *control;
104 u_int32_t len;
105 struct mbuf **getnam;
106 int error, sotype, rcvflg;
107
108 /*
109 * Set up arguments for soreceive()
110 */
111 *mp = NULL;
112 *aname = NULL;
113 sotype = rep->r_nmp->nm_sotype;
114
115 /*
116 * For reliable protocols, lock against other senders/receivers
117 * in case a reconnect is necessary.
118 * For SOCK_STREAM, first get the Record Mark to find out how much
119 * more there is to get.
120 * We must lock the socket against other receivers
121 * until we have an entire rpc request/reply.
122 */
123 if (sotype != SOCK_DGRAM) {
124 error = nfs_sndlock(rep->r_nmp, rep);
125 if (error)
126 return (error);
127 tryagain:
128 /*
129 * Check for fatal errors and resending request.
130 */
131 /*
132 * Ugh: If a reconnect attempt just happened, nm_so
133 * would have changed. NULL indicates a failed
134 * attempt that has essentially shut down this
135 * mount point.
136 */
137 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
138 nfs_sndunlock(rep->r_nmp);
139 return (EINTR);
140 }
141 so = rep->r_nmp->nm_so;
142 if (!so) {
143 error = nfs_reconnect(rep);
144 if (error) {
145 nfs_sndunlock(rep->r_nmp);
146 return (error);
147 }
148 goto tryagain;
149 }
150 while (rep->r_flags & R_MUSTRESEND) {
151 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
152 nfsstats.rpcretries++;
153 rep->r_rtt = 0;
154 rep->r_flags &= ~R_TIMING;
155 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l);
156 if (error) {
157 if (error == EINTR || error == ERESTART ||
158 (error = nfs_reconnect(rep)) != 0) {
159 nfs_sndunlock(rep->r_nmp);
160 return (error);
161 }
162 goto tryagain;
163 }
164 }
165 nfs_sndunlock(rep->r_nmp);
166 if (sotype == SOCK_STREAM) {
167 aio.iov_base = (void *) &len;
168 aio.iov_len = sizeof(u_int32_t);
169 auio.uio_iov = &aio;
170 auio.uio_iovcnt = 1;
171 auio.uio_rw = UIO_READ;
172 auio.uio_offset = 0;
173 auio.uio_resid = sizeof(u_int32_t);
174 UIO_SETUP_SYSSPACE(&auio);
175 do {
176 rcvflg = MSG_WAITALL;
177 error = (*so->so_receive)(so, NULL, &auio,
178 NULL, NULL, &rcvflg);
179 if (error == EWOULDBLOCK && rep) {
180 if (rep->r_flags & R_SOFTTERM)
181 return (EINTR);
182 /*
183 * if it seems that the server died after it
184 * received our request, set EPIPE so that
185 * we'll reconnect and retransmit requests.
186 */
187 if (rep->r_rexmit >= rep->r_nmp->nm_retry) {
188 nfsstats.rpctimeouts++;
189 error = EPIPE;
190 }
191 }
192 } while (error == EWOULDBLOCK);
193 if (!error && auio.uio_resid > 0) {
194 /*
195 * Don't log a 0 byte receive; it means
196 * that the socket has been closed, and
197 * can happen during normal operation
198 * (forcible unmount or Solaris server).
199 */
200 if (auio.uio_resid != sizeof (u_int32_t))
201 log(LOG_INFO,
202 "short receive (%lu/%lu) from nfs server %s\n",
203 (u_long)sizeof(u_int32_t) - auio.uio_resid,
204 (u_long)sizeof(u_int32_t),
205 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
206 error = EPIPE;
207 }
208 if (error)
209 goto errout;
210 len = ntohl(len) & ~0x80000000;
211 /*
212 * This is SERIOUS! We are out of sync with the sender
213 * and forcing a disconnect/reconnect is all I can do.
214 */
215 if (len > NFS_MAXPACKET) {
216 log(LOG_ERR, "%s (%d) from nfs server %s\n",
217 "impossible packet length",
218 len,
219 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
220 error = EFBIG;
221 goto errout;
222 }
223 auio.uio_resid = len;
224 do {
225 rcvflg = MSG_WAITALL;
226 error = (*so->so_receive)(so, NULL,
227 &auio, mp, NULL, &rcvflg);
228 } while (error == EWOULDBLOCK || error == EINTR ||
229 error == ERESTART);
230 if (!error && auio.uio_resid > 0) {
231 if (len != auio.uio_resid)
232 log(LOG_INFO,
233 "short receive (%lu/%d) from nfs server %s\n",
234 (u_long)len - auio.uio_resid, len,
235 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
236 error = EPIPE;
237 }
238 } else {
239 /*
240 * NB: Since uio_resid is big, MSG_WAITALL is ignored
241 * and soreceive() will return when it has either a
242 * control msg or a data msg.
243 * We have no use for control msg., but must grab them
244 * and then throw them away so we know what is going
245 * on.
246 */
247 auio.uio_resid = len = 100000000; /* Anything Big */
248 /* not need to setup uio_vmspace */
249 do {
250 rcvflg = 0;
251 error = (*so->so_receive)(so, NULL,
252 &auio, mp, &control, &rcvflg);
253 if (control)
254 m_freem(control);
255 if (error == EWOULDBLOCK && rep) {
256 if (rep->r_flags & R_SOFTTERM)
257 return (EINTR);
258 }
259 } while (error == EWOULDBLOCK ||
260 (!error && *mp == NULL && control));
261 if ((rcvflg & MSG_EOR) == 0)
262 printf("Egad!!\n");
263 if (!error && *mp == NULL)
264 error = EPIPE;
265 len -= auio.uio_resid;
266 }
267 errout:
268 if (error && error != EINTR && error != ERESTART) {
269 m_freem(*mp);
270 *mp = NULL;
271 if (error != EPIPE)
272 log(LOG_INFO,
273 "receive error %d from nfs server %s\n",
274 error,
275 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
276 error = nfs_sndlock(rep->r_nmp, rep);
277 if (!error)
278 error = nfs_reconnect(rep);
279 if (!error)
280 goto tryagain;
281 else
282 nfs_sndunlock(rep->r_nmp);
283 }
284 } else {
285 if ((so = rep->r_nmp->nm_so) == NULL)
286 return (EACCES);
287 if (so->so_state & SS_ISCONNECTED)
288 getnam = NULL;
289 else
290 getnam = aname;
291 auio.uio_resid = len = 1000000;
292 /* not need to setup uio_vmspace */
293 do {
294 rcvflg = 0;
295 error = (*so->so_receive)(so, getnam, &auio, mp,
296 NULL, &rcvflg);
297 if (error == EWOULDBLOCK &&
298 (rep->r_flags & R_SOFTTERM))
299 return (EINTR);
300 } while (error == EWOULDBLOCK);
301 len -= auio.uio_resid;
302 if (!error && *mp == NULL)
303 error = EPIPE;
304 }
305 if (error) {
306 m_freem(*mp);
307 *mp = NULL;
308 }
309 return (error);
310 }
311
312 /*
313 * Implement receipt of reply on a socket.
314 * We must search through the list of received datagrams matching them
315 * with outstanding requests using the xid, until ours is found.
316 */
317 /* ARGSUSED */
318 static int
319 nfs_reply(struct nfsreq *myrep, struct lwp *lwp)
320 {
321 struct nfsreq *rep;
322 struct nfsmount *nmp = myrep->r_nmp;
323 int32_t t1;
324 struct mbuf *mrep, *nam, *md;
325 u_int32_t rxid, *tl;
326 char *dpos, *cp2;
327 int error;
328
329 /*
330 * Loop around until we get our own reply
331 */
332 for (;;) {
333 /*
334 * Lock against other receivers so that I don't get stuck in
335 * sbwait() after someone else has received my reply for me.
336 * Also necessary for connection based protocols to avoid
337 * race conditions during a reconnect.
338 */
339 error = nfs_rcvlock(nmp, myrep);
340 if (error == EALREADY)
341 return (0);
342 if (error)
343 return (error);
344 /*
345 * Get the next Rpc reply off the socket
346 */
347
348 mutex_enter(&nmp->nm_lock);
349 nmp->nm_waiters++;
350 mutex_exit(&nmp->nm_lock);
351
352 error = nfs_receive(myrep, &nam, &mrep, lwp);
353
354 mutex_enter(&nmp->nm_lock);
355 nmp->nm_waiters--;
356 cv_signal(&nmp->nm_disconcv);
357 mutex_exit(&nmp->nm_lock);
358
359 if (error) {
360 nfs_rcvunlock(nmp);
361
362 if (nmp->nm_iflag & NFSMNT_DISMNT) {
363 /*
364 * Oops, we're going away now..
365 */
366 return error;
367 }
368 /*
369 * Ignore routing errors on connectionless protocols? ?
370 */
371 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
372 nmp->nm_so->so_error = 0;
373 #ifdef DEBUG
374 if (ratecheck(&nfs_reply_last_err_time,
375 &nfs_err_interval))
376 printf("%s: ignoring error %d\n",
377 __func__, error);
378 #endif
379 continue;
380 }
381 return (error);
382 }
383 if (nam)
384 m_freem(nam);
385
386 /*
387 * Get the xid and check that it is an rpc reply
388 */
389 md = mrep;
390 dpos = mtod(md, void *);
391 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED);
392 rxid = *tl++;
393 if (*tl != rpc_reply) {
394 nfsstats.rpcinvalid++;
395 m_freem(mrep);
396 nfsmout:
397 nfs_rcvunlock(nmp);
398 continue;
399 }
400
401 /*
402 * Loop through the request list to match up the reply
403 * Iff no match, just drop the datagram
404 */
405 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
406 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
407 /* Found it.. */
408 rep->r_mrep = mrep;
409 rep->r_md = md;
410 rep->r_dpos = dpos;
411 if (nfsrtton) {
412 struct rttl *rt;
413
414 rt = &nfsrtt.rttl[nfsrtt.pos];
415 rt->proc = rep->r_procnum;
416 rt->rto = NFS_RTO(nmp, nfs_proct[rep->r_procnum]);
417 rt->sent = nmp->nm_sent;
418 rt->cwnd = nmp->nm_cwnd;
419 rt->srtt = nmp->nm_srtt[nfs_proct[rep->r_procnum] - 1];
420 rt->sdrtt = nmp->nm_sdrtt[nfs_proct[rep->r_procnum] - 1];
421 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx;
422 getmicrotime(&rt->tstamp);
423 if (rep->r_flags & R_TIMING)
424 rt->rtt = rep->r_rtt;
425 else
426 rt->rtt = 1000000;
427 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
428 }
429 /*
430 * Update congestion window.
431 * Do the additive increase of
432 * one rpc/rtt.
433 */
434 if (nmp->nm_cwnd <= nmp->nm_sent) {
435 nmp->nm_cwnd +=
436 (NFS_CWNDSCALE * NFS_CWNDSCALE +
437 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
438 if (nmp->nm_cwnd > NFS_MAXCWND)
439 nmp->nm_cwnd = NFS_MAXCWND;
440 }
441 rep->r_flags &= ~R_SENT;
442 nmp->nm_sent -= NFS_CWNDSCALE;
443 /*
444 * Update rtt using a gain of 0.125 on the mean
445 * and a gain of 0.25 on the deviation.
446 */
447 if (rep->r_flags & R_TIMING) {
448 /*
449 * Since the timer resolution of
450 * NFS_HZ is so course, it can often
451 * result in r_rtt == 0. Since
452 * r_rtt == N means that the actual
453 * rtt is between N+dt and N+2-dt ticks,
454 * add 1.
455 */
456 t1 = rep->r_rtt + 1;
457 t1 -= (NFS_SRTT(rep) >> 3);
458 NFS_SRTT(rep) += t1;
459 if (t1 < 0)
460 t1 = -t1;
461 t1 -= (NFS_SDRTT(rep) >> 2);
462 NFS_SDRTT(rep) += t1;
463 }
464 nmp->nm_timeouts = 0;
465 break;
466 }
467 }
468 nfs_rcvunlock(nmp);
469 /*
470 * If not matched to a request, drop it.
471 * If it's mine, get out.
472 */
473 if (rep == 0) {
474 nfsstats.rpcunexpected++;
475 m_freem(mrep);
476 } else if (rep == myrep) {
477 if (rep->r_mrep == NULL)
478 panic("nfsreply nil");
479 return (0);
480 }
481 }
482 }
483
484 /*
485 * nfs_request - goes something like this
486 * - fill in request struct
487 * - links it into list
488 * - calls nfs_send() for first transmit
489 * - calls nfs_receive() to get reply
490 * - break down rpc header and return with nfs reply pointed to
491 * by mrep or error
492 * nb: always frees up mreq mbuf list
493 */
494 int
495 nfs_request(struct nfsnode *np, struct mbuf *mrest, int procnum, struct lwp *lwp, kauth_cred_t cred, struct mbuf **mrp, struct mbuf **mdp, char **dposp, int *rexmitp)
496 {
497 struct mbuf *m, *mrep;
498 struct nfsreq *rep;
499 u_int32_t *tl;
500 int i;
501 struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount);
502 struct mbuf *md, *mheadend;
503 char nickv[RPCX_NICKVERF];
504 time_t waituntil;
505 char *dpos, *cp2;
506 int t1, s, error = 0, mrest_len, auth_len, auth_type;
507 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0;
508 int verf_len, verf_type;
509 u_int32_t xid;
510 char *auth_str, *verf_str;
511 NFSKERBKEY_T key; /* save session key */
512 kauth_cred_t acred;
513 struct mbuf *mrest_backup = NULL;
514 kauth_cred_t origcred = NULL; /* XXX: gcc */
515 bool retry_cred = true;
516 bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0;
517
518 if (rexmitp != NULL)
519 *rexmitp = 0;
520
521 acred = kauth_cred_alloc();
522
523 tryagain_cred:
524 KASSERT(cred != NULL);
525 rep = kmem_alloc(sizeof(*rep), KM_SLEEP);
526 rep->r_nmp = nmp;
527 KASSERT(lwp == NULL || lwp == curlwp);
528 rep->r_lwp = lwp;
529 rep->r_procnum = procnum;
530 i = 0;
531 m = mrest;
532 while (m) {
533 i += m->m_len;
534 m = m->m_next;
535 }
536 mrest_len = i;
537
538 /*
539 * Get the RPC header with authorization.
540 */
541 kerbauth:
542 verf_str = auth_str = NULL;
543 if (nmp->nm_flag & NFSMNT_KERB) {
544 verf_str = nickv;
545 verf_len = sizeof (nickv);
546 auth_type = RPCAUTH_KERB4;
547 memset((void *)key, 0, sizeof (key));
548 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
549 &auth_len, verf_str, verf_len)) {
550 error = nfs_getauth(nmp, rep, cred, &auth_str,
551 &auth_len, verf_str, &verf_len, key);
552 if (error) {
553 kmem_free(rep, sizeof(*rep));
554 m_freem(mrest);
555 KASSERT(kauth_cred_getrefcnt(acred) == 1);
556 kauth_cred_free(acred);
557 return (error);
558 }
559 }
560 retry_cred = false;
561 } else {
562 /* AUTH_UNIX */
563 uid_t uid;
564 gid_t gid;
565
566 /*
567 * on the most unix filesystems, permission checks are
568 * done when the file is open(2)'ed.
569 * ie. once a file is successfully open'ed,
570 * following i/o operations never fail with EACCES.
571 * we try to follow the semantics as far as possible.
572 *
573 * note that we expect that the nfs server always grant
574 * accesses by the file's owner.
575 */
576 origcred = cred;
577 switch (procnum) {
578 case NFSPROC_READ:
579 case NFSPROC_WRITE:
580 case NFSPROC_COMMIT:
581 uid = np->n_vattr->va_uid;
582 gid = np->n_vattr->va_gid;
583 if (kauth_cred_geteuid(cred) == uid &&
584 kauth_cred_getegid(cred) == gid) {
585 retry_cred = false;
586 break;
587 }
588 if (use_opencred)
589 break;
590 kauth_cred_setuid(acred, uid);
591 kauth_cred_seteuid(acred, uid);
592 kauth_cred_setsvuid(acred, uid);
593 kauth_cred_setgid(acred, gid);
594 kauth_cred_setegid(acred, gid);
595 kauth_cred_setsvgid(acred, gid);
596 cred = acred;
597 break;
598 default:
599 retry_cred = false;
600 break;
601 }
602 /*
603 * backup mbuf chain if we can need it later to retry.
604 *
605 * XXX maybe we can keep a direct reference to
606 * mrest without doing m_copym, but it's ...ugly.
607 */
608 if (retry_cred)
609 mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT);
610 auth_type = RPCAUTH_UNIX;
611 /* XXX elad - ngroups */
612 auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ?
613 nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) +
614 5 * NFSX_UNSIGNED;
615 }
616 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
617 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
618 if (auth_str)
619 free(auth_str, M_TEMP);
620
621 /*
622 * For stream protocols, insert a Sun RPC Record Mark.
623 */
624 if (nmp->nm_sotype == SOCK_STREAM) {
625 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
626 *mtod(m, u_int32_t *) = htonl(0x80000000 |
627 (m->m_pkthdr.len - NFSX_UNSIGNED));
628 }
629 rep->r_mreq = m;
630 rep->r_xid = xid;
631 tryagain:
632 if (nmp->nm_flag & NFSMNT_SOFT)
633 rep->r_retry = nmp->nm_retry;
634 else
635 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
636 rep->r_rtt = rep->r_rexmit = 0;
637 if (nfs_proct[procnum] > 0)
638 rep->r_flags = R_TIMING;
639 else
640 rep->r_flags = 0;
641 rep->r_mrep = NULL;
642
643 /*
644 * Do the client side RPC.
645 */
646 nfsstats.rpcrequests++;
647 /*
648 * Chain request into list of outstanding requests. Be sure
649 * to put it LAST so timer finds oldest requests first.
650 */
651 s = splsoftnet();
652 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
653 nfs_timer_start();
654
655 /*
656 * If backing off another request or avoiding congestion, don't
657 * send this one now but let timer do it. If not timing a request,
658 * do it now.
659 */
660 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
661 (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) {
662 splx(s);
663 if (nmp->nm_soflags & PR_CONNREQUIRED)
664 error = nfs_sndlock(nmp, rep);
665 if (!error) {
666 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
667 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp);
668 if (nmp->nm_soflags & PR_CONNREQUIRED)
669 nfs_sndunlock(nmp);
670 }
671 s = splsoftnet();
672 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
673 if ((rep->r_flags & R_SENT) == 0) {
674 nmp->nm_sent += NFS_CWNDSCALE;
675 rep->r_flags |= R_SENT;
676 }
677 }
678 splx(s);
679 } else {
680 splx(s);
681 rep->r_rtt = -1;
682 }
683
684 /*
685 * Wait for the reply from our send or the timer's.
686 */
687 if (!error || error == EPIPE || error == EWOULDBLOCK)
688 error = nfs_reply(rep, lwp);
689
690 /*
691 * RPC done, unlink the request.
692 */
693 s = splsoftnet();
694 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
695
696 /*
697 * Decrement the outstanding request count.
698 */
699 if (rep->r_flags & R_SENT) {
700 rep->r_flags &= ~R_SENT; /* paranoia */
701 nmp->nm_sent -= NFS_CWNDSCALE;
702 }
703 splx(s);
704
705 if (rexmitp != NULL) {
706 int rexmit;
707
708 if (nmp->nm_sotype != SOCK_DGRAM)
709 rexmit = (rep->r_flags & R_REXMITTED) != 0;
710 else
711 rexmit = rep->r_rexmit;
712 *rexmitp = rexmit;
713 }
714
715 /*
716 * If there was a successful reply and a tprintf msg.
717 * tprintf a response.
718 */
719 if (!error && (rep->r_flags & R_TPRINTFMSG))
720 nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname,
721 "is alive again");
722 mrep = rep->r_mrep;
723 md = rep->r_md;
724 dpos = rep->r_dpos;
725 if (error)
726 goto nfsmout;
727
728 /*
729 * break down the rpc header and check if ok
730 */
731 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
732 if (*tl++ == rpc_msgdenied) {
733 if (*tl == rpc_mismatch)
734 error = EOPNOTSUPP;
735 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
736 if (!failed_auth) {
737 failed_auth++;
738 mheadend->m_next = NULL;
739 m_freem(mrep);
740 m_freem(rep->r_mreq);
741 goto kerbauth;
742 } else
743 error = EAUTH;
744 } else
745 error = EACCES;
746 m_freem(mrep);
747 goto nfsmout;
748 }
749
750 /*
751 * Grab any Kerberos verifier, otherwise just throw it away.
752 */
753 verf_type = fxdr_unsigned(int, *tl++);
754 i = fxdr_unsigned(int32_t, *tl);
755 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
756 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
757 if (error)
758 goto nfsmout;
759 } else if (i > 0)
760 nfsm_adv(nfsm_rndup(i));
761 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
762 /* 0 == ok */
763 if (*tl == 0) {
764 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
765 if (*tl != 0) {
766 error = fxdr_unsigned(int, *tl);
767 switch (error) {
768 case NFSERR_PERM:
769 error = EPERM;
770 break;
771
772 case NFSERR_NOENT:
773 error = ENOENT;
774 break;
775
776 case NFSERR_IO:
777 error = EIO;
778 break;
779
780 case NFSERR_NXIO:
781 error = ENXIO;
782 break;
783
784 case NFSERR_ACCES:
785 error = EACCES;
786 if (!retry_cred)
787 break;
788 m_freem(mrep);
789 m_freem(rep->r_mreq);
790 kmem_free(rep, sizeof(*rep));
791 use_opencred = !use_opencred;
792 if (mrest_backup == NULL) {
793 /* m_copym failure */
794 KASSERT(
795 kauth_cred_getrefcnt(acred) == 1);
796 kauth_cred_free(acred);
797 return ENOMEM;
798 }
799 mrest = mrest_backup;
800 mrest_backup = NULL;
801 cred = origcred;
802 error = 0;
803 retry_cred = false;
804 goto tryagain_cred;
805
806 case NFSERR_EXIST:
807 error = EEXIST;
808 break;
809
810 case NFSERR_XDEV:
811 error = EXDEV;
812 break;
813
814 case NFSERR_NODEV:
815 error = ENODEV;
816 break;
817
818 case NFSERR_NOTDIR:
819 error = ENOTDIR;
820 break;
821
822 case NFSERR_ISDIR:
823 error = EISDIR;
824 break;
825
826 case NFSERR_INVAL:
827 error = EINVAL;
828 break;
829
830 case NFSERR_FBIG:
831 error = EFBIG;
832 break;
833
834 case NFSERR_NOSPC:
835 error = ENOSPC;
836 break;
837
838 case NFSERR_ROFS:
839 error = EROFS;
840 break;
841
842 case NFSERR_MLINK:
843 error = EMLINK;
844 break;
845
846 case NFSERR_TIMEDOUT:
847 error = ETIMEDOUT;
848 break;
849
850 case NFSERR_NAMETOL:
851 error = ENAMETOOLONG;
852 break;
853
854 case NFSERR_NOTEMPTY:
855 error = ENOTEMPTY;
856 break;
857
858 case NFSERR_DQUOT:
859 error = EDQUOT;
860 break;
861
862 case NFSERR_STALE:
863 /*
864 * If the File Handle was stale, invalidate the
865 * lookup cache, just in case.
866 */
867 error = ESTALE;
868 cache_purge(NFSTOV(np));
869 break;
870
871 case NFSERR_REMOTE:
872 error = EREMOTE;
873 break;
874
875 case NFSERR_WFLUSH:
876 case NFSERR_BADHANDLE:
877 case NFSERR_NOT_SYNC:
878 case NFSERR_BAD_COOKIE:
879 error = EINVAL;
880 break;
881
882 case NFSERR_NOTSUPP:
883 error = ENOTSUP;
884 break;
885
886 case NFSERR_TOOSMALL:
887 case NFSERR_SERVERFAULT:
888 case NFSERR_BADTYPE:
889 error = EINVAL;
890 break;
891
892 case NFSERR_TRYLATER:
893 if ((nmp->nm_flag & NFSMNT_NFSV3) == 0)
894 break;
895 m_freem(mrep);
896 error = 0;
897 waituntil = time_second + trylater_delay;
898 while (time_second < waituntil) {
899 kpause("nfstrylater", false, hz, NULL);
900 }
901 trylater_delay *= NFS_TRYLATERDELMUL;
902 if (trylater_delay > NFS_TRYLATERDELMAX)
903 trylater_delay = NFS_TRYLATERDELMAX;
904 /*
905 * RFC1813:
906 * The client should wait and then try
907 * the request with a new RPC transaction ID.
908 */
909 nfs_renewxid(rep);
910 goto tryagain;
911
912 default:
913 #ifdef DIAGNOSTIC
914 printf("Invalid rpc error code %d\n", error);
915 #endif
916 error = EINVAL;
917 break;
918 }
919
920 if (nmp->nm_flag & NFSMNT_NFSV3) {
921 *mrp = mrep;
922 *mdp = md;
923 *dposp = dpos;
924 error |= NFSERR_RETERR;
925 } else
926 m_freem(mrep);
927 goto nfsmout;
928 }
929
930 /*
931 * note which credential worked to minimize number of retries.
932 */
933 if (use_opencred)
934 np->n_flag |= NUSEOPENCRED;
935 else
936 np->n_flag &= ~NUSEOPENCRED;
937
938 *mrp = mrep;
939 *mdp = md;
940 *dposp = dpos;
941
942 KASSERT(error == 0);
943 goto nfsmout;
944 }
945 m_freem(mrep);
946 error = EPROTONOSUPPORT;
947 nfsmout:
948 KASSERT(kauth_cred_getrefcnt(acred) == 1);
949 kauth_cred_free(acred);
950 m_freem(rep->r_mreq);
951 kmem_free(rep, sizeof(*rep));
952 m_freem(mrest_backup);
953 return (error);
954 }
955
956 /*
957 * Lock a socket against others.
958 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
959 * and also to avoid race conditions between the processes with nfs requests
960 * in progress when a reconnect is necessary.
961 */
962 static int
963 nfs_sndlock(struct nfsmount *nmp, struct nfsreq *rep)
964 {
965 struct lwp *l;
966 int timeo = 0;
967 bool catch = false;
968 int error = 0;
969
970 if (rep) {
971 l = rep->r_lwp;
972 if (rep->r_nmp->nm_flag & NFSMNT_INT)
973 catch = true;
974 } else
975 l = NULL;
976 mutex_enter(&nmp->nm_lock);
977 while ((nmp->nm_iflag & NFSMNT_SNDLOCK) != 0) {
978 if (rep && nfs_sigintr(rep->r_nmp, rep, l)) {
979 error = EINTR;
980 goto quit;
981 }
982 if (catch) {
983 cv_timedwait_sig(&nmp->nm_sndcv, &nmp->nm_lock, timeo);
984 } else {
985 cv_timedwait(&nmp->nm_sndcv, &nmp->nm_lock, timeo);
986 }
987 if (catch) {
988 catch = false;
989 timeo = 2 * hz;
990 }
991 }
992 nmp->nm_iflag |= NFSMNT_SNDLOCK;
993 quit:
994 mutex_exit(&nmp->nm_lock);
995 return error;
996 }
997
998 /*
999 * Unlock the stream socket for others.
1000 */
1001 static void
1002 nfs_sndunlock(struct nfsmount *nmp)
1003 {
1004
1005 mutex_enter(&nmp->nm_lock);
1006 if ((nmp->nm_iflag & NFSMNT_SNDLOCK) == 0)
1007 panic("nfs sndunlock");
1008 nmp->nm_iflag &= ~NFSMNT_SNDLOCK;
1009 cv_signal(&nmp->nm_sndcv);
1010 mutex_exit(&nmp->nm_lock);
1011 }
1012