nfs_srvsocket.c revision 1.2 1 /* $NetBSD: nfs_srvsocket.c,v 1.2 2009/03/14 14:46:11 dsl Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1991, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
35 */
36
37 /*
38 * Socket operations for use by nfs
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: nfs_srvsocket.c,v 1.2 2009/03/14 14:46:11 dsl Exp $");
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/evcnt.h>
47 #include <sys/callout.h>
48 #include <sys/proc.h>
49 #include <sys/mount.h>
50 #include <sys/kernel.h>
51 #include <sys/kmem.h>
52 #include <sys/mbuf.h>
53 #include <sys/vnode.h>
54 #include <sys/domain.h>
55 #include <sys/protosw.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/syslog.h>
59 #include <sys/tprintf.h>
60 #include <sys/namei.h>
61 #include <sys/signal.h>
62 #include <sys/signalvar.h>
63 #include <sys/kauth.h>
64
65 #include <netinet/in.h>
66 #include <netinet/tcp.h>
67
68 #include <nfs/rpcv2.h>
69 #include <nfs/nfsproto.h>
70 #include <nfs/nfs.h>
71 #include <nfs/xdr_subs.h>
72 #include <nfs/nfsm_subs.h>
73 #include <nfs/nfsmount.h>
74 #include <nfs/nfsnode.h>
75 #include <nfs/nfsrtt.h>
76 #include <nfs/nfs_var.h>
77
78 static void nfsrv_wakenfsd_locked(struct nfssvc_sock *);
79
80 int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *,
81 struct nfssvc_sock *, struct lwp *,
82 struct mbuf **) = {
83 nfsrv_null,
84 nfsrv_getattr,
85 nfsrv_setattr,
86 nfsrv_lookup,
87 nfsrv3_access,
88 nfsrv_readlink,
89 nfsrv_read,
90 nfsrv_write,
91 nfsrv_create,
92 nfsrv_mkdir,
93 nfsrv_symlink,
94 nfsrv_mknod,
95 nfsrv_remove,
96 nfsrv_rmdir,
97 nfsrv_rename,
98 nfsrv_link,
99 nfsrv_readdir,
100 nfsrv_readdirplus,
101 nfsrv_statfs,
102 nfsrv_fsinfo,
103 nfsrv_pathconf,
104 nfsrv_commit,
105 nfsrv_noop
106 };
107
108 /*
109 * Socket upcall routine for the nfsd sockets.
110 * The void *arg is a pointer to the "struct nfssvc_sock".
111 */
112 void
113 nfsrv_soupcall(struct socket *so, void *arg, int waitflag)
114 {
115 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
116
117 nfsdsock_setbits(slp, SLP_A_NEEDQ);
118 nfsrv_wakenfsd(slp);
119 }
120
121 void
122 nfsrv_rcv(struct nfssvc_sock *slp)
123 {
124 struct socket *so;
125 struct mbuf *m;
126 struct mbuf *mp, *nam;
127 struct uio auio;
128 int flags;
129 int error;
130 int setflags = 0;
131
132 error = nfsdsock_lock(slp, true);
133 if (error) {
134 setflags |= SLP_A_NEEDQ;
135 goto dorecs_unlocked;
136 }
137
138 nfsdsock_clearbits(slp, SLP_A_NEEDQ);
139
140 so = slp->ns_so;
141 if (so->so_type == SOCK_STREAM) {
142 /*
143 * Do soreceive().
144 */
145 auio.uio_resid = 1000000000;
146 /* not need to setup uio_vmspace */
147 flags = MSG_DONTWAIT;
148 error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, &flags);
149 if (error || mp == NULL) {
150 if (error == EWOULDBLOCK)
151 setflags |= SLP_A_NEEDQ;
152 else
153 setflags |= SLP_A_DISCONN;
154 goto dorecs;
155 }
156 m = mp;
157 m_claimm(m, &nfs_mowner);
158 if (slp->ns_rawend) {
159 slp->ns_rawend->m_next = m;
160 slp->ns_cc += 1000000000 - auio.uio_resid;
161 } else {
162 slp->ns_raw = m;
163 slp->ns_cc = 1000000000 - auio.uio_resid;
164 }
165 while (m->m_next)
166 m = m->m_next;
167 slp->ns_rawend = m;
168
169 /*
170 * Now try and parse record(s) out of the raw stream data.
171 */
172 error = nfsrv_getstream(slp, M_WAIT);
173 if (error) {
174 if (error == EPERM)
175 setflags |= SLP_A_DISCONN;
176 else
177 setflags |= SLP_A_NEEDQ;
178 }
179 } else {
180 do {
181 auio.uio_resid = 1000000000;
182 /* not need to setup uio_vmspace */
183 flags = MSG_DONTWAIT;
184 error = (*so->so_receive)(so, &nam, &auio, &mp, NULL,
185 &flags);
186 if (mp) {
187 if (nam) {
188 m = nam;
189 m->m_next = mp;
190 } else
191 m = mp;
192 m_claimm(m, &nfs_mowner);
193 if (slp->ns_recend)
194 slp->ns_recend->m_nextpkt = m;
195 else
196 slp->ns_rec = m;
197 slp->ns_recend = m;
198 m->m_nextpkt = (struct mbuf *)0;
199 }
200 if (error) {
201 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
202 && error != EWOULDBLOCK) {
203 setflags |= SLP_A_DISCONN;
204 goto dorecs;
205 }
206 }
207 } while (mp);
208 }
209 dorecs:
210 nfsdsock_unlock(slp);
211
212 dorecs_unlocked:
213 if (setflags) {
214 nfsdsock_setbits(slp, setflags);
215 }
216 }
217
218 int
219 nfsdsock_lock(struct nfssvc_sock *slp, bool waitok)
220 {
221
222 mutex_enter(&slp->ns_lock);
223 while ((~slp->ns_flags & (SLP_BUSY|SLP_VALID)) == 0) {
224 if (!waitok) {
225 mutex_exit(&slp->ns_lock);
226 return EWOULDBLOCK;
227 }
228 cv_wait(&slp->ns_cv, &slp->ns_lock);
229 }
230 if ((slp->ns_flags & SLP_VALID) == 0) {
231 mutex_exit(&slp->ns_lock);
232 return EINVAL;
233 }
234 KASSERT((slp->ns_flags & SLP_BUSY) == 0);
235 slp->ns_flags |= SLP_BUSY;
236 mutex_exit(&slp->ns_lock);
237
238 return 0;
239 }
240
241 void
242 nfsdsock_unlock(struct nfssvc_sock *slp)
243 {
244
245 mutex_enter(&slp->ns_lock);
246 KASSERT((slp->ns_flags & SLP_BUSY) != 0);
247 cv_broadcast(&slp->ns_cv);
248 slp->ns_flags &= ~SLP_BUSY;
249 mutex_exit(&slp->ns_lock);
250 }
251
252 int
253 nfsdsock_drain(struct nfssvc_sock *slp)
254 {
255 int error = 0;
256
257 mutex_enter(&slp->ns_lock);
258 if ((slp->ns_flags & SLP_VALID) == 0) {
259 error = EINVAL;
260 goto done;
261 }
262 slp->ns_flags &= ~SLP_VALID;
263 while ((slp->ns_flags & SLP_BUSY) != 0) {
264 cv_wait(&slp->ns_cv, &slp->ns_lock);
265 }
266 done:
267 mutex_exit(&slp->ns_lock);
268
269 return error;
270 }
271
272 /*
273 * Try and extract an RPC request from the mbuf data list received on a
274 * stream socket. The "waitflag" argument indicates whether or not it
275 * can sleep.
276 */
277 int
278 nfsrv_getstream(slp, waitflag)
279 struct nfssvc_sock *slp;
280 int waitflag;
281 {
282 struct mbuf *m, **mpp;
283 struct mbuf *recm;
284 u_int32_t recmark;
285 int error = 0;
286
287 KASSERT((slp->ns_flags & SLP_BUSY) != 0);
288 for (;;) {
289 if (slp->ns_reclen == 0) {
290 if (slp->ns_cc < NFSX_UNSIGNED) {
291 break;
292 }
293 m = slp->ns_raw;
294 m_copydata(m, 0, NFSX_UNSIGNED, (void *)&recmark);
295 m_adj(m, NFSX_UNSIGNED);
296 slp->ns_cc -= NFSX_UNSIGNED;
297 recmark = ntohl(recmark);
298 slp->ns_reclen = recmark & ~0x80000000;
299 if (recmark & 0x80000000)
300 slp->ns_sflags |= SLP_S_LASTFRAG;
301 else
302 slp->ns_sflags &= ~SLP_S_LASTFRAG;
303 if (slp->ns_reclen > NFS_MAXPACKET) {
304 error = EPERM;
305 break;
306 }
307 }
308
309 /*
310 * Now get the record part.
311 *
312 * Note that slp->ns_reclen may be 0. Linux sometimes
313 * generates 0-length records.
314 */
315 if (slp->ns_cc == slp->ns_reclen) {
316 recm = slp->ns_raw;
317 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
318 slp->ns_cc = slp->ns_reclen = 0;
319 } else if (slp->ns_cc > slp->ns_reclen) {
320 recm = slp->ns_raw;
321 m = m_split(recm, slp->ns_reclen, waitflag);
322 if (m == NULL) {
323 error = EWOULDBLOCK;
324 break;
325 }
326 m_claimm(recm, &nfs_mowner);
327 slp->ns_raw = m;
328 if (m->m_next == NULL)
329 slp->ns_rawend = m;
330 slp->ns_cc -= slp->ns_reclen;
331 slp->ns_reclen = 0;
332 } else {
333 break;
334 }
335
336 /*
337 * Accumulate the fragments into a record.
338 */
339 mpp = &slp->ns_frag;
340 while (*mpp)
341 mpp = &((*mpp)->m_next);
342 *mpp = recm;
343 if (slp->ns_sflags & SLP_S_LASTFRAG) {
344 if (slp->ns_recend)
345 slp->ns_recend->m_nextpkt = slp->ns_frag;
346 else
347 slp->ns_rec = slp->ns_frag;
348 slp->ns_recend = slp->ns_frag;
349 slp->ns_frag = NULL;
350 }
351 }
352
353 return error;
354 }
355
356 /*
357 * Parse an RPC header.
358 */
359 int
360 nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
361 struct nfsrv_descript **ndp, bool *more)
362 {
363 struct mbuf *m, *nam;
364 struct nfsrv_descript *nd;
365 int error;
366
367 *ndp = NULL;
368 *more = false;
369
370 if (nfsdsock_lock(slp, true)) {
371 return ENOBUFS;
372 }
373 m = slp->ns_rec;
374 if (m == NULL) {
375 nfsdsock_unlock(slp);
376 return ENOBUFS;
377 }
378 slp->ns_rec = m->m_nextpkt;
379 if (slp->ns_rec) {
380 m->m_nextpkt = NULL;
381 *more = true;
382 } else {
383 slp->ns_recend = NULL;
384 }
385 nfsdsock_unlock(slp);
386
387 if (m->m_type == MT_SONAME) {
388 nam = m;
389 m = m->m_next;
390 nam->m_next = NULL;
391 } else
392 nam = NULL;
393 nd = nfsdreq_alloc();
394 nd->nd_md = nd->nd_mrep = m;
395 nd->nd_nam2 = nam;
396 nd->nd_dpos = mtod(m, void *);
397 error = nfs_getreq(nd, nfsd, true);
398 if (error) {
399 m_freem(nam);
400 nfsdreq_free(nd);
401 return (error);
402 }
403 *ndp = nd;
404 nfsd->nfsd_nd = nd;
405 return (0);
406 }
407
408 bool
409 nfsrv_timer(void)
410 {
411 struct timeval tv;
412 struct nfssvc_sock *slp;
413 u_quad_t cur_usec;
414 struct nfsrv_descript *nd;
415 bool more;
416
417 /*
418 * Scan the write gathering queues for writes that need to be
419 * completed now.
420 */
421 getmicrotime(&tv);
422 cur_usec = (u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec;
423 more = false;
424 mutex_enter(&nfsd_lock);
425 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
426 nd = LIST_FIRST(&slp->ns_tq);
427 if (nd != NULL) {
428 if (nd->nd_time <= cur_usec) {
429 nfsrv_wakenfsd_locked(slp);
430 }
431 more = true;
432 }
433 }
434 mutex_exit(&nfsd_lock);
435 return more;
436 }
437
438 /*
439 * Search for a sleeping nfsd and wake it up.
440 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
441 * running nfsds will go look for the work in the nfssvc_sock list.
442 */
443 static void
444 nfsrv_wakenfsd_locked(struct nfssvc_sock *slp)
445 {
446 struct nfsd *nd;
447
448 KASSERT(mutex_owned(&nfsd_lock));
449
450 if ((slp->ns_flags & SLP_VALID) == 0)
451 return;
452 if (slp->ns_gflags & SLP_G_DOREC)
453 return;
454 nd = SLIST_FIRST(&nfsd_idle_head);
455 if (nd) {
456 SLIST_REMOVE_HEAD(&nfsd_idle_head, nfsd_idle);
457 if (nd->nfsd_slp)
458 panic("nfsd wakeup");
459 slp->ns_sref++;
460 KASSERT(slp->ns_sref > 0);
461 nd->nfsd_slp = slp;
462 cv_signal(&nd->nfsd_cv);
463 } else {
464 slp->ns_gflags |= SLP_G_DOREC;
465 nfsd_head_flag |= NFSD_CHECKSLP;
466 TAILQ_INSERT_TAIL(&nfssvc_sockpending, slp, ns_pending);
467 }
468 }
469
470 void
471 nfsrv_wakenfsd(struct nfssvc_sock *slp)
472 {
473
474 mutex_enter(&nfsd_lock);
475 nfsrv_wakenfsd_locked(slp);
476 mutex_exit(&nfsd_lock);
477 }
478
479 int
480 nfsdsock_sendreply(struct nfssvc_sock *slp, struct nfsrv_descript *nd)
481 {
482 int error;
483
484 if (nd->nd_mrep != NULL) {
485 m_freem(nd->nd_mrep);
486 nd->nd_mrep = NULL;
487 }
488
489 mutex_enter(&slp->ns_lock);
490 if ((slp->ns_flags & SLP_SENDING) != 0) {
491 SIMPLEQ_INSERT_TAIL(&slp->ns_sendq, nd, nd_sendq);
492 mutex_exit(&slp->ns_lock);
493 return 0;
494 }
495 KASSERT(SIMPLEQ_EMPTY(&slp->ns_sendq));
496 slp->ns_flags |= SLP_SENDING;
497 mutex_exit(&slp->ns_lock);
498
499 again:
500 error = nfs_send(slp->ns_so, nd->nd_nam2, nd->nd_mreq, NULL, curlwp);
501 if (nd->nd_nam2) {
502 m_free(nd->nd_nam2);
503 }
504 nfsdreq_free(nd);
505
506 mutex_enter(&slp->ns_lock);
507 KASSERT((slp->ns_flags & SLP_SENDING) != 0);
508 nd = SIMPLEQ_FIRST(&slp->ns_sendq);
509 if (nd != NULL) {
510 SIMPLEQ_REMOVE_HEAD(&slp->ns_sendq, nd_sendq);
511 mutex_exit(&slp->ns_lock);
512 goto again;
513 }
514 slp->ns_flags &= ~SLP_SENDING;
515 mutex_exit(&slp->ns_lock);
516
517 return error;
518 }
519
520 void
521 nfsdsock_setbits(struct nfssvc_sock *slp, int bits)
522 {
523
524 mutex_enter(&slp->ns_alock);
525 slp->ns_aflags |= bits;
526 mutex_exit(&slp->ns_alock);
527 }
528
529 void
530 nfsdsock_clearbits(struct nfssvc_sock *slp, int bits)
531 {
532
533 mutex_enter(&slp->ns_alock);
534 slp->ns_aflags &= ~bits;
535 mutex_exit(&slp->ns_alock);
536 }
537
538 bool
539 nfsdsock_testbits(struct nfssvc_sock *slp, int bits)
540 {
541
542 return (slp->ns_aflags & bits);
543 }
544