nfs_clntsocket.c revision 1.1.40.2 1 /* $NetBSD: nfs_clntsocket.c,v 1.1.40.2 2016/07/10 09:42:34 martin Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1991, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
35 */
36
37 /*
38 * Socket operations for use by nfs
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: nfs_clntsocket.c,v 1.1.40.2 2016/07/10 09:42:34 martin Exp $");
43
44 #ifdef _KERNEL_OPT
45 #include "opt_nfs.h"
46 #include "opt_mbuftrace.h"
47 #endif
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/evcnt.h>
52 #include <sys/callout.h>
53 #include <sys/proc.h>
54 #include <sys/mount.h>
55 #include <sys/kernel.h>
56 #include <sys/kmem.h>
57 #include <sys/mbuf.h>
58 #include <sys/vnode.h>
59 #include <sys/domain.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/socketvar.h>
63 #include <sys/syslog.h>
64 #include <sys/tprintf.h>
65 #include <sys/namei.h>
66 #include <sys/signal.h>
67 #include <sys/signalvar.h>
68 #include <sys/kauth.h>
69
70 #include <netinet/in.h>
71 #include <netinet/tcp.h>
72
73 #include <nfs/rpcv2.h>
74 #include <nfs/nfsproto.h>
75 #include <nfs/nfs.h>
76 #include <nfs/xdr_subs.h>
77 #include <nfs/nfsm_subs.h>
78 #include <nfs/nfsmount.h>
79 #include <nfs/nfsnode.h>
80 #include <nfs/nfsrtt.h>
81 #include <nfs/nfs_var.h>
82
83 static int nfs_sndlock(struct nfsmount *, struct nfsreq *);
84 static void nfs_sndunlock(struct nfsmount *);
85
86 /*
87 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
88 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
89 * Mark and consolidate the data into a new mbuf list.
90 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
91 * small mbufs.
92 * For SOCK_STREAM we must be very careful to read an entire record once
93 * we have read any of it, even if the system call has been interrupted.
94 */
95 static int
96 nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp,
97 struct lwp *l)
98 {
99 struct socket *so;
100 struct uio auio;
101 struct iovec aio;
102 struct mbuf *m;
103 struct mbuf *control;
104 u_int32_t len;
105 struct mbuf **getnam;
106 int error, sotype, rcvflg;
107
108 /*
109 * Set up arguments for soreceive()
110 */
111 *mp = NULL;
112 *aname = NULL;
113 sotype = rep->r_nmp->nm_sotype;
114
115 /*
116 * For reliable protocols, lock against other senders/receivers
117 * in case a reconnect is necessary.
118 * For SOCK_STREAM, first get the Record Mark to find out how much
119 * more there is to get.
120 * We must lock the socket against other receivers
121 * until we have an entire rpc request/reply.
122 */
123 if (sotype != SOCK_DGRAM) {
124 error = nfs_sndlock(rep->r_nmp, rep);
125 if (error)
126 return (error);
127 tryagain:
128 /*
129 * Check for fatal errors and resending request.
130 */
131 /*
132 * Ugh: If a reconnect attempt just happened, nm_so
133 * would have changed. NULL indicates a failed
134 * attempt that has essentially shut down this
135 * mount point.
136 */
137 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
138 nfs_sndunlock(rep->r_nmp);
139 return (EINTR);
140 }
141 so = rep->r_nmp->nm_so;
142 if (!so) {
143 error = nfs_reconnect(rep);
144 if (error) {
145 nfs_sndunlock(rep->r_nmp);
146 return (error);
147 }
148 goto tryagain;
149 }
150 while (rep->r_flags & R_MUSTRESEND) {
151 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
152 nfsstats.rpcretries++;
153 rep->r_rtt = 0;
154 rep->r_flags &= ~R_TIMING;
155 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l);
156 if (error) {
157 if (error == EINTR || error == ERESTART ||
158 (error = nfs_reconnect(rep)) != 0) {
159 nfs_sndunlock(rep->r_nmp);
160 return (error);
161 }
162 goto tryagain;
163 }
164 }
165 nfs_sndunlock(rep->r_nmp);
166 if (sotype == SOCK_STREAM) {
167 aio.iov_base = (void *) &len;
168 aio.iov_len = sizeof(u_int32_t);
169 auio.uio_iov = &aio;
170 auio.uio_iovcnt = 1;
171 auio.uio_rw = UIO_READ;
172 auio.uio_offset = 0;
173 auio.uio_resid = sizeof(u_int32_t);
174 UIO_SETUP_SYSSPACE(&auio);
175 do {
176 rcvflg = MSG_WAITALL;
177 error = (*so->so_receive)(so, NULL, &auio,
178 NULL, NULL, &rcvflg);
179 if (error == EWOULDBLOCK && rep) {
180 if (rep->r_flags & R_SOFTTERM)
181 return (EINTR);
182 /*
183 * if it seems that the server died after it
184 * received our request, set EPIPE so that
185 * we'll reconnect and retransmit requests.
186 */
187 if (rep->r_rexmit >= rep->r_nmp->nm_retry) {
188 nfsstats.rpctimeouts++;
189 error = EPIPE;
190 }
191 }
192 } while (error == EWOULDBLOCK);
193 if (!error && auio.uio_resid > 0) {
194 /*
195 * Don't log a 0 byte receive; it means
196 * that the socket has been closed, and
197 * can happen during normal operation
198 * (forcible unmount or Solaris server).
199 */
200 if (auio.uio_resid != sizeof (u_int32_t))
201 log(LOG_INFO,
202 "short receive (%lu/%lu) from nfs server %s\n",
203 (u_long)sizeof(u_int32_t) - auio.uio_resid,
204 (u_long)sizeof(u_int32_t),
205 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
206 error = EPIPE;
207 }
208 if (error)
209 goto errout;
210 len = ntohl(len) & ~0x80000000;
211 /*
212 * This is SERIOUS! We are out of sync with the sender
213 * and forcing a disconnect/reconnect is all I can do.
214 */
215 if (len > NFS_MAXPACKET) {
216 log(LOG_ERR, "%s (%d) from nfs server %s\n",
217 "impossible packet length",
218 len,
219 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
220 error = EFBIG;
221 goto errout;
222 }
223 auio.uio_resid = len;
224 do {
225 rcvflg = MSG_WAITALL;
226 error = (*so->so_receive)(so, NULL,
227 &auio, mp, NULL, &rcvflg);
228 } while (error == EWOULDBLOCK || error == EINTR ||
229 error == ERESTART);
230 if (!error && auio.uio_resid > 0) {
231 if (len != auio.uio_resid)
232 log(LOG_INFO,
233 "short receive (%lu/%d) from nfs server %s\n",
234 (u_long)len - auio.uio_resid, len,
235 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
236 error = EPIPE;
237 }
238 } else {
239 /*
240 * NB: Since uio_resid is big, MSG_WAITALL is ignored
241 * and soreceive() will return when it has either a
242 * control msg or a data msg.
243 * We have no use for control msg., but must grab them
244 * and then throw them away so we know what is going
245 * on.
246 */
247 auio.uio_resid = len = 100000000; /* Anything Big */
248 /* not need to setup uio_vmspace */
249 do {
250 rcvflg = 0;
251 error = (*so->so_receive)(so, NULL,
252 &auio, mp, &control, &rcvflg);
253 if (control)
254 m_freem(control);
255 if (error == EWOULDBLOCK && rep) {
256 if (rep->r_flags & R_SOFTTERM)
257 return (EINTR);
258 }
259 } while (error == EWOULDBLOCK ||
260 (!error && *mp == NULL && control));
261 if ((rcvflg & MSG_EOR) == 0)
262 printf("Egad!!\n");
263 if (!error && *mp == NULL)
264 error = EPIPE;
265 len -= auio.uio_resid;
266 }
267 errout:
268 if (error && error != EINTR && error != ERESTART) {
269 m_freem(*mp);
270 *mp = NULL;
271 if (error != EPIPE)
272 log(LOG_INFO,
273 "receive error %d from nfs server %s\n",
274 error,
275 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
276 error = nfs_sndlock(rep->r_nmp, rep);
277 if (!error)
278 error = nfs_reconnect(rep);
279 if (!error)
280 goto tryagain;
281 else
282 nfs_sndunlock(rep->r_nmp);
283 }
284 } else {
285 if ((so = rep->r_nmp->nm_so) == NULL)
286 return (EACCES);
287 if (so->so_state & SS_ISCONNECTED)
288 getnam = NULL;
289 else
290 getnam = aname;
291 auio.uio_resid = len = 1000000;
292 /* not need to setup uio_vmspace */
293 do {
294 rcvflg = 0;
295 error = (*so->so_receive)(so, getnam, &auio, mp,
296 NULL, &rcvflg);
297 if (error == EWOULDBLOCK &&
298 (rep->r_flags & R_SOFTTERM))
299 return (EINTR);
300 } while (error == EWOULDBLOCK);
301 len -= auio.uio_resid;
302 if (!error && *mp == NULL)
303 error = EPIPE;
304 }
305 if (error) {
306 m_freem(*mp);
307 *mp = NULL;
308 }
309 return (error);
310 }
311
312 /*
313 * Implement receipt of reply on a socket.
314 * We must search through the list of received datagrams matching them
315 * with outstanding requests using the xid, until ours is found.
316 */
317 /* ARGSUSED */
318 static int
319 nfs_reply(struct nfsreq *myrep, struct lwp *lwp)
320 {
321 struct nfsreq *rep;
322 struct nfsmount *nmp = myrep->r_nmp;
323 int32_t t1;
324 struct mbuf *mrep, *nam, *md;
325 u_int32_t rxid, *tl;
326 char *dpos, *cp2;
327 int error, s;
328
329 /*
330 * Loop around until we get our own reply
331 */
332 for (;;) {
333 /*
334 * Lock against other receivers so that I don't get stuck in
335 * sbwait() after someone else has received my reply for me.
336 * Also necessary for connection based protocols to avoid
337 * race conditions during a reconnect.
338 */
339 error = nfs_rcvlock(nmp, myrep);
340 if (error == EALREADY)
341 return (0);
342 if (error)
343 return (error);
344 /*
345 * Get the next Rpc reply off the socket
346 */
347
348 mutex_enter(&nmp->nm_lock);
349 nmp->nm_waiters++;
350 mutex_exit(&nmp->nm_lock);
351
352 error = nfs_receive(myrep, &nam, &mrep, lwp);
353
354 mutex_enter(&nmp->nm_lock);
355 nmp->nm_waiters--;
356 cv_signal(&nmp->nm_disconcv);
357 mutex_exit(&nmp->nm_lock);
358
359 if (error) {
360 nfs_rcvunlock(nmp);
361
362 if (nmp->nm_iflag & NFSMNT_DISMNT) {
363 /*
364 * Oops, we're going away now..
365 */
366 return error;
367 }
368 /*
369 * Ignore routing errors on connectionless protocols? ?
370 */
371 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
372 nmp->nm_so->so_error = 0;
373 #ifdef DEBUG
374 if (ratecheck(&nfs_reply_last_err_time,
375 &nfs_err_interval))
376 printf("%s: ignoring error %d\n",
377 __func__, error);
378 #endif
379 continue;
380 }
381 return (error);
382 }
383 if (nam)
384 m_freem(nam);
385
386 /*
387 * Get the xid and check that it is an rpc reply
388 */
389 md = mrep;
390 dpos = mtod(md, void *);
391 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED);
392 rxid = *tl++;
393 if (*tl != rpc_reply) {
394 nfsstats.rpcinvalid++;
395 m_freem(mrep);
396 nfsmout:
397 nfs_rcvunlock(nmp);
398 continue;
399 }
400
401 /*
402 * Loop through the request list to match up the reply
403 * Iff no match, just drop the datagram
404 */
405 s = splsoftnet();
406 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
407 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
408 /* Found it.. */
409 rep->r_mrep = mrep;
410 rep->r_md = md;
411 rep->r_dpos = dpos;
412 if (nfsrtton) {
413 struct rttl *rt;
414
415 rt = &nfsrtt.rttl[nfsrtt.pos];
416 rt->proc = rep->r_procnum;
417 rt->rto = NFS_RTO(nmp, nfs_proct[rep->r_procnum]);
418 rt->sent = nmp->nm_sent;
419 rt->cwnd = nmp->nm_cwnd;
420 rt->srtt = nmp->nm_srtt[nfs_proct[rep->r_procnum] - 1];
421 rt->sdrtt = nmp->nm_sdrtt[nfs_proct[rep->r_procnum] - 1];
422 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx;
423 getmicrotime(&rt->tstamp);
424 if (rep->r_flags & R_TIMING)
425 rt->rtt = rep->r_rtt;
426 else
427 rt->rtt = 1000000;
428 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
429 }
430 /*
431 * Update congestion window.
432 * Do the additive increase of
433 * one rpc/rtt.
434 */
435 if (nmp->nm_cwnd <= nmp->nm_sent) {
436 nmp->nm_cwnd +=
437 (NFS_CWNDSCALE * NFS_CWNDSCALE +
438 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
439 if (nmp->nm_cwnd > NFS_MAXCWND)
440 nmp->nm_cwnd = NFS_MAXCWND;
441 }
442 rep->r_flags &= ~R_SENT;
443 nmp->nm_sent -= NFS_CWNDSCALE;
444 /*
445 * Update rtt using a gain of 0.125 on the mean
446 * and a gain of 0.25 on the deviation.
447 */
448 if (rep->r_flags & R_TIMING) {
449 /*
450 * Since the timer resolution of
451 * NFS_HZ is so course, it can often
452 * result in r_rtt == 0. Since
453 * r_rtt == N means that the actual
454 * rtt is between N+dt and N+2-dt ticks,
455 * add 1.
456 */
457 t1 = rep->r_rtt + 1;
458 t1 -= (NFS_SRTT(rep) >> 3);
459 NFS_SRTT(rep) += t1;
460 if (t1 < 0)
461 t1 = -t1;
462 t1 -= (NFS_SDRTT(rep) >> 2);
463 NFS_SDRTT(rep) += t1;
464 }
465 nmp->nm_timeouts = 0;
466 break;
467 }
468 }
469 splx(s);
470 nfs_rcvunlock(nmp);
471 /*
472 * If not matched to a request, drop it.
473 * If it's mine, get out.
474 */
475 if (rep == 0) {
476 nfsstats.rpcunexpected++;
477 m_freem(mrep);
478 } else if (rep == myrep) {
479 if (rep->r_mrep == NULL)
480 panic("nfsreply nil");
481 return (0);
482 }
483 }
484 }
485
486 /*
487 * nfs_request - goes something like this
488 * - fill in request struct
489 * - links it into list
490 * - calls nfs_send() for first transmit
491 * - calls nfs_receive() to get reply
492 * - break down rpc header and return with nfs reply pointed to
493 * by mrep or error
494 * nb: always frees up mreq mbuf list
495 */
496 int
497 nfs_request(struct nfsnode *np, struct mbuf *mrest, int procnum, struct lwp *lwp, kauth_cred_t cred, struct mbuf **mrp, struct mbuf **mdp, char **dposp, int *rexmitp)
498 {
499 struct mbuf *m, *mrep;
500 struct nfsreq *rep;
501 u_int32_t *tl;
502 int i;
503 struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount);
504 struct mbuf *md, *mheadend;
505 char nickv[RPCX_NICKVERF];
506 time_t waituntil;
507 char *dpos, *cp2;
508 int t1, s, error = 0, mrest_len, auth_len, auth_type;
509 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0;
510 int verf_len, verf_type;
511 u_int32_t xid;
512 char *auth_str, *verf_str;
513 NFSKERBKEY_T key; /* save session key */
514 kauth_cred_t acred;
515 struct mbuf *mrest_backup = NULL;
516 kauth_cred_t origcred = NULL; /* XXX: gcc */
517 bool retry_cred = true;
518 bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0;
519
520 if (rexmitp != NULL)
521 *rexmitp = 0;
522
523 acred = kauth_cred_alloc();
524
525 tryagain_cred:
526 KASSERT(cred != NULL);
527 rep = kmem_alloc(sizeof(*rep), KM_SLEEP);
528 rep->r_nmp = nmp;
529 KASSERT(lwp == NULL || lwp == curlwp);
530 rep->r_lwp = lwp;
531 rep->r_procnum = procnum;
532 i = 0;
533 m = mrest;
534 while (m) {
535 i += m->m_len;
536 m = m->m_next;
537 }
538 mrest_len = i;
539
540 /*
541 * Get the RPC header with authorization.
542 */
543 kerbauth:
544 verf_str = auth_str = NULL;
545 if (nmp->nm_flag & NFSMNT_KERB) {
546 verf_str = nickv;
547 verf_len = sizeof (nickv);
548 auth_type = RPCAUTH_KERB4;
549 memset((void *)key, 0, sizeof (key));
550 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
551 &auth_len, verf_str, verf_len)) {
552 error = nfs_getauth(nmp, rep, cred, &auth_str,
553 &auth_len, verf_str, &verf_len, key);
554 if (error) {
555 kmem_free(rep, sizeof(*rep));
556 m_freem(mrest);
557 KASSERT(kauth_cred_getrefcnt(acred) == 1);
558 kauth_cred_free(acred);
559 return (error);
560 }
561 }
562 retry_cred = false;
563 } else {
564 /* AUTH_UNIX */
565 uid_t uid;
566 gid_t gid;
567
568 /*
569 * on the most unix filesystems, permission checks are
570 * done when the file is open(2)'ed.
571 * ie. once a file is successfully open'ed,
572 * following i/o operations never fail with EACCES.
573 * we try to follow the semantics as far as possible.
574 *
575 * note that we expect that the nfs server always grant
576 * accesses by the file's owner.
577 */
578 origcred = cred;
579 switch (procnum) {
580 case NFSPROC_READ:
581 case NFSPROC_WRITE:
582 case NFSPROC_COMMIT:
583 uid = np->n_vattr->va_uid;
584 gid = np->n_vattr->va_gid;
585 if (kauth_cred_geteuid(cred) == uid &&
586 kauth_cred_getegid(cred) == gid) {
587 retry_cred = false;
588 break;
589 }
590 if (use_opencred)
591 break;
592 kauth_cred_setuid(acred, uid);
593 kauth_cred_seteuid(acred, uid);
594 kauth_cred_setsvuid(acred, uid);
595 kauth_cred_setgid(acred, gid);
596 kauth_cred_setegid(acred, gid);
597 kauth_cred_setsvgid(acred, gid);
598 cred = acred;
599 break;
600 default:
601 retry_cred = false;
602 break;
603 }
604 /*
605 * backup mbuf chain if we can need it later to retry.
606 *
607 * XXX maybe we can keep a direct reference to
608 * mrest without doing m_copym, but it's ...ugly.
609 */
610 if (retry_cred)
611 mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT);
612 auth_type = RPCAUTH_UNIX;
613 /* XXX elad - ngroups */
614 auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ?
615 nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) +
616 5 * NFSX_UNSIGNED;
617 }
618 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
619 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
620 if (auth_str)
621 free(auth_str, M_TEMP);
622
623 /*
624 * For stream protocols, insert a Sun RPC Record Mark.
625 */
626 if (nmp->nm_sotype == SOCK_STREAM) {
627 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
628 *mtod(m, u_int32_t *) = htonl(0x80000000 |
629 (m->m_pkthdr.len - NFSX_UNSIGNED));
630 }
631 rep->r_mreq = m;
632 rep->r_xid = xid;
633 tryagain:
634 if (nmp->nm_flag & NFSMNT_SOFT)
635 rep->r_retry = nmp->nm_retry;
636 else
637 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
638 rep->r_rtt = rep->r_rexmit = 0;
639 if (nfs_proct[procnum] > 0)
640 rep->r_flags = R_TIMING;
641 else
642 rep->r_flags = 0;
643 rep->r_mrep = NULL;
644
645 /*
646 * Do the client side RPC.
647 */
648 nfsstats.rpcrequests++;
649 /*
650 * Chain request into list of outstanding requests. Be sure
651 * to put it LAST so timer finds oldest requests first.
652 */
653 s = splsoftnet();
654 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
655 nfs_timer_start();
656
657 /*
658 * If backing off another request or avoiding congestion, don't
659 * send this one now but let timer do it. If not timing a request,
660 * do it now.
661 */
662 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
663 (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) {
664 splx(s);
665 if (nmp->nm_soflags & PR_CONNREQUIRED)
666 error = nfs_sndlock(nmp, rep);
667 if (!error) {
668 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
669 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp);
670 if (nmp->nm_soflags & PR_CONNREQUIRED)
671 nfs_sndunlock(nmp);
672 }
673 s = splsoftnet();
674 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
675 if ((rep->r_flags & R_SENT) == 0) {
676 nmp->nm_sent += NFS_CWNDSCALE;
677 rep->r_flags |= R_SENT;
678 }
679 }
680 splx(s);
681 } else {
682 splx(s);
683 rep->r_rtt = -1;
684 }
685
686 /*
687 * Wait for the reply from our send or the timer's.
688 */
689 if (!error || error == EPIPE || error == EWOULDBLOCK)
690 error = nfs_reply(rep, lwp);
691
692 /*
693 * RPC done, unlink the request.
694 */
695 s = splsoftnet();
696 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
697
698 /*
699 * Decrement the outstanding request count.
700 */
701 if (rep->r_flags & R_SENT) {
702 rep->r_flags &= ~R_SENT; /* paranoia */
703 nmp->nm_sent -= NFS_CWNDSCALE;
704 }
705 splx(s);
706
707 if (rexmitp != NULL) {
708 int rexmit;
709
710 if (nmp->nm_sotype != SOCK_DGRAM)
711 rexmit = (rep->r_flags & R_REXMITTED) != 0;
712 else
713 rexmit = rep->r_rexmit;
714 *rexmitp = rexmit;
715 }
716
717 /*
718 * If there was a successful reply and a tprintf msg.
719 * tprintf a response.
720 */
721 if (!error && (rep->r_flags & R_TPRINTFMSG))
722 nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname,
723 "is alive again");
724 mrep = rep->r_mrep;
725 md = rep->r_md;
726 dpos = rep->r_dpos;
727 if (error)
728 goto nfsmout;
729
730 /*
731 * break down the rpc header and check if ok
732 */
733 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
734 if (*tl++ == rpc_msgdenied) {
735 if (*tl == rpc_mismatch)
736 error = EOPNOTSUPP;
737 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
738 if (!failed_auth) {
739 failed_auth++;
740 mheadend->m_next = NULL;
741 m_freem(mrep);
742 m_freem(rep->r_mreq);
743 goto kerbauth;
744 } else
745 error = EAUTH;
746 } else
747 error = EACCES;
748 m_freem(mrep);
749 goto nfsmout;
750 }
751
752 /*
753 * Grab any Kerberos verifier, otherwise just throw it away.
754 */
755 verf_type = fxdr_unsigned(int, *tl++);
756 i = fxdr_unsigned(int32_t, *tl);
757 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
758 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
759 if (error)
760 goto nfsmout;
761 } else if (i > 0)
762 nfsm_adv(nfsm_rndup(i));
763 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
764 /* 0 == ok */
765 if (*tl == 0) {
766 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
767 if (*tl != 0) {
768 error = fxdr_unsigned(int, *tl);
769 switch (error) {
770 case NFSERR_PERM:
771 error = EPERM;
772 break;
773
774 case NFSERR_NOENT:
775 error = ENOENT;
776 break;
777
778 case NFSERR_IO:
779 error = EIO;
780 break;
781
782 case NFSERR_NXIO:
783 error = ENXIO;
784 break;
785
786 case NFSERR_ACCES:
787 error = EACCES;
788 if (!retry_cred)
789 break;
790 m_freem(mrep);
791 m_freem(rep->r_mreq);
792 kmem_free(rep, sizeof(*rep));
793 use_opencred = !use_opencred;
794 if (mrest_backup == NULL) {
795 /* m_copym failure */
796 KASSERT(
797 kauth_cred_getrefcnt(acred) == 1);
798 kauth_cred_free(acred);
799 return ENOMEM;
800 }
801 mrest = mrest_backup;
802 mrest_backup = NULL;
803 cred = origcred;
804 error = 0;
805 retry_cred = false;
806 goto tryagain_cred;
807
808 case NFSERR_EXIST:
809 error = EEXIST;
810 break;
811
812 case NFSERR_XDEV:
813 error = EXDEV;
814 break;
815
816 case NFSERR_NODEV:
817 error = ENODEV;
818 break;
819
820 case NFSERR_NOTDIR:
821 error = ENOTDIR;
822 break;
823
824 case NFSERR_ISDIR:
825 error = EISDIR;
826 break;
827
828 case NFSERR_INVAL:
829 error = EINVAL;
830 break;
831
832 case NFSERR_FBIG:
833 error = EFBIG;
834 break;
835
836 case NFSERR_NOSPC:
837 error = ENOSPC;
838 break;
839
840 case NFSERR_ROFS:
841 error = EROFS;
842 break;
843
844 case NFSERR_MLINK:
845 error = EMLINK;
846 break;
847
848 case NFSERR_TIMEDOUT:
849 error = ETIMEDOUT;
850 break;
851
852 case NFSERR_NAMETOL:
853 error = ENAMETOOLONG;
854 break;
855
856 case NFSERR_NOTEMPTY:
857 error = ENOTEMPTY;
858 break;
859
860 case NFSERR_DQUOT:
861 error = EDQUOT;
862 break;
863
864 case NFSERR_STALE:
865 /*
866 * If the File Handle was stale, invalidate the
867 * lookup cache, just in case.
868 */
869 error = ESTALE;
870 cache_purge(NFSTOV(np));
871 break;
872
873 case NFSERR_REMOTE:
874 error = EREMOTE;
875 break;
876
877 case NFSERR_WFLUSH:
878 case NFSERR_BADHANDLE:
879 case NFSERR_NOT_SYNC:
880 case NFSERR_BAD_COOKIE:
881 error = EINVAL;
882 break;
883
884 case NFSERR_NOTSUPP:
885 error = ENOTSUP;
886 break;
887
888 case NFSERR_TOOSMALL:
889 case NFSERR_SERVERFAULT:
890 case NFSERR_BADTYPE:
891 error = EINVAL;
892 break;
893
894 case NFSERR_TRYLATER:
895 if ((nmp->nm_flag & NFSMNT_NFSV3) == 0)
896 break;
897 m_freem(mrep);
898 error = 0;
899 waituntil = time_second + trylater_delay;
900 while (time_second < waituntil) {
901 kpause("nfstrylater", false, hz, NULL);
902 }
903 trylater_delay *= NFS_TRYLATERDELMUL;
904 if (trylater_delay > NFS_TRYLATERDELMAX)
905 trylater_delay = NFS_TRYLATERDELMAX;
906 /*
907 * RFC1813:
908 * The client should wait and then try
909 * the request with a new RPC transaction ID.
910 */
911 nfs_renewxid(rep);
912 goto tryagain;
913
914 default:
915 #ifdef DIAGNOSTIC
916 printf("Invalid rpc error code %d\n", error);
917 #endif
918 error = EINVAL;
919 break;
920 }
921
922 if (nmp->nm_flag & NFSMNT_NFSV3) {
923 *mrp = mrep;
924 *mdp = md;
925 *dposp = dpos;
926 error |= NFSERR_RETERR;
927 } else
928 m_freem(mrep);
929 goto nfsmout;
930 }
931
932 /*
933 * note which credential worked to minimize number of retries.
934 */
935 if (use_opencred)
936 np->n_flag |= NUSEOPENCRED;
937 else
938 np->n_flag &= ~NUSEOPENCRED;
939
940 *mrp = mrep;
941 *mdp = md;
942 *dposp = dpos;
943
944 KASSERT(error == 0);
945 goto nfsmout;
946 }
947 m_freem(mrep);
948 error = EPROTONOSUPPORT;
949 nfsmout:
950 KASSERT(kauth_cred_getrefcnt(acred) == 1);
951 kauth_cred_free(acred);
952 m_freem(rep->r_mreq);
953 kmem_free(rep, sizeof(*rep));
954 m_freem(mrest_backup);
955 return (error);
956 }
957
958 /*
959 * Lock a socket against others.
960 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
961 * and also to avoid race conditions between the processes with nfs requests
962 * in progress when a reconnect is necessary.
963 */
964 static int
965 nfs_sndlock(struct nfsmount *nmp, struct nfsreq *rep)
966 {
967 struct lwp *l;
968 int timeo = 0;
969 bool catch = false;
970 int error = 0;
971
972 if (nmp->nm_flag & NFSMNT_SOFT)
973 timeo = nmp->nm_retry * nmp->nm_timeo;
974
975 if (nmp->nm_iflag & NFSMNT_DISMNTFORCE)
976 timeo = hz;
977
978 if (rep) {
979 l = rep->r_lwp;
980 if (rep->r_nmp->nm_flag & NFSMNT_INT)
981 catch = true;
982 } else
983 l = NULL;
984 mutex_enter(&nmp->nm_lock);
985 while ((nmp->nm_iflag & NFSMNT_SNDLOCK) != 0) {
986 if (rep && nfs_sigintr(rep->r_nmp, rep, l)) {
987 error = EINTR;
988 goto quit;
989 }
990 if (catch) {
991 error = cv_timedwait_sig(&nmp->nm_sndcv,
992 &nmp->nm_lock, timeo);
993 } else {
994 error = cv_timedwait(&nmp->nm_sndcv,
995 &nmp->nm_lock, timeo);
996 }
997
998 if (error) {
999 if ((error == EWOULDBLOCK) &&
1000 (nmp->nm_flag & NFSMNT_SOFT)) {
1001 error = EIO;
1002 goto quit;
1003 }
1004 error = 0;
1005 }
1006 if (catch) {
1007 catch = false;
1008 timeo = 2 * hz;
1009 }
1010 }
1011 nmp->nm_iflag |= NFSMNT_SNDLOCK;
1012 quit:
1013 mutex_exit(&nmp->nm_lock);
1014 return error;
1015 }
1016
1017 /*
1018 * Unlock the stream socket for others.
1019 */
1020 static void
1021 nfs_sndunlock(struct nfsmount *nmp)
1022 {
1023
1024 mutex_enter(&nmp->nm_lock);
1025 if ((nmp->nm_iflag & NFSMNT_SNDLOCK) == 0)
1026 panic("nfs sndunlock");
1027 nmp->nm_iflag &= ~NFSMNT_SNDLOCK;
1028 cv_signal(&nmp->nm_sndcv);
1029 mutex_exit(&nmp->nm_lock);
1030 }
1031