perfuse.c revision 1.23 1 /* $NetBSD: perfuse.c,v 1.23 2011/10/30 05:11:37 manu Exp $ */
2
3 /*-
4 * Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <fcntl.h>
32 #include <string.h>
33 #include <errno.h>
34 #include <puffs.h>
35 #include <sys/types.h>
36 #include <sys/mman.h>
37 #include <sys/resource.h>
38 #include <sys/socket.h>
39 #include <sys/extattr.h>
40 #include <sys/un.h>
41 #include <machine/vmparam.h>
42
43 #define LIBPERFUSE
44 #include "perfuse.h"
45 #include "perfuse_if.h"
46 #include "perfuse_priv.h"
47
48 int perfuse_diagflags = 0; /* global used only in DPRINTF/DERR/DWARN */
49 extern char **environ;
50
51 static struct perfuse_state *init_state(void);
52 static int get_fd(const char *);
53
54
55 static struct perfuse_state *
56 init_state(void)
57 {
58 struct perfuse_state *ps;
59 char opts[1024];
60
61 if ((ps = malloc(sizeof(*ps))) == NULL)
62 DERR(EX_OSERR, "%s: malloc failed", __func__);
63
64 (void)memset(ps, 0, sizeof(*ps));
65 ps->ps_max_write = UINT_MAX;
66 ps->ps_max_readahead = UINT_MAX;
67
68 /*
69 * Most of the time, access() is broken because the filesystem
70 * performs the check with root privileges. glusterfs will do that
71 * if the Linux-specific setfsuid() is missing, for instance.
72 */
73 ps->ps_flags |= PS_NO_ACCESS;
74
75 /*
76 * This is a temporary way to toggle access and creat usage.
77 * It would be nice if that could be provided as mount options,
78 * but that will not be obvious to do.
79 */
80 if (getenv_r("PERFUSE_OPTIONS", opts, sizeof(opts)) != -1) {
81 char *optname;
82 char *last;
83
84 for ((optname = strtok_r(opts, ",", &last));
85 optname != NULL;
86 (optname = strtok_r(NULL, ",", &last))) {
87 if (strcmp(optname, "enable_access") == 0)
88 ps->ps_flags &= ~PS_NO_ACCESS;
89
90 if (strcmp(optname, "disable_access") == 0)
91 ps->ps_flags |= PS_NO_ACCESS;
92
93 if (strcmp(optname, "enable_creat") == 0)
94 ps->ps_flags &= ~PS_NO_CREAT;
95
96 if (strcmp(optname, "disable_creat") == 0)
97 ps->ps_flags |= PS_NO_CREAT;
98 }
99 }
100
101
102 return ps;
103 }
104
105
106 static int
107 get_fd(data)
108 const char *data;
109 {
110 char *string;
111 const char fdopt[] = "fd=";
112 char *lastp;
113 char *opt;
114 int fd = -1;
115
116 if ((string = strdup(data)) == NULL)
117 return -1;
118
119 for (opt = strtok_r(string, ",", &lastp);
120 opt != NULL;
121 opt = strtok_r(NULL, ",", &lastp)) {
122 if (strncmp(opt, fdopt, strlen(fdopt)) == 0) {
123 fd = atoi(opt + strlen(fdopt));
124 break;
125 }
126 }
127
128 /*
129 * No file descriptor found
130 */
131 if (fd == -1)
132 errno = EINVAL;
133
134 free(string);
135 return fd;
136
137 }
138
139 int
140 perfuse_open(path, flags, mode)
141 const char *path;
142 int flags;
143 mode_t mode;
144 {
145 int sv[2];
146 struct sockaddr_un sun;
147 struct sockaddr *sa;
148 char progname[] = _PATH_PERFUSED;
149 char minus_i[] = "-i";
150 char fdstr[16];
151 char *const argv[] = { progname, minus_i, fdstr, NULL};
152 uint32_t opt;
153 uint32_t optlen;
154 int sock_type = SOCK_SEQPACKET;
155
156 if (strcmp(path, _PATH_FUSE) != 0)
157 return open(path, flags, mode);
158
159 /*
160 * Try SOCK_SEQPACKET then SOCK_DGRAM if unavailable
161 */
162 if ((sv[0] = socket(PF_LOCAL, SOCK_SEQPACKET, 0)) == -1) {
163 sock_type = SOCK_DGRAM;
164 DWARNX("SEQPACKET local sockets unavailable, using less "
165 "reliable DGRAM sockets. Expect file operation hangs.");
166
167 if ((sv[0] = socket(PF_LOCAL, SOCK_DGRAM, 0)) == -1) {
168 #ifdef PERFUSE_DEBUG
169 DWARN("%s: %d socket failed", __func__, __LINE__);
170 #endif
171 return -1;
172 }
173 }
174
175 /*
176 * Set a buffer lentgh large enough so that any FUSE packet
177 * will fit.
178 */
179 opt = (uint32_t)FUSE_BUFSIZE;
180 optlen = sizeof(opt);
181 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
182 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
183
184 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
185 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
186
187 sa = (struct sockaddr *)(void *)&sun;
188 sun.sun_len = sizeof(sun);
189 sun.sun_family = AF_LOCAL;
190 (void)strcpy(sun.sun_path, path);
191
192 if (connect(sv[0], sa, (socklen_t)sun.sun_len) == 0)
193 return sv[0];
194
195 /*
196 * Attempt to run perfused on our own
197 * if it does not run yet; In that case
198 * we will talk using a socketpair
199 * instead of /dev/fuse.
200 */
201 if (socketpair(PF_LOCAL, sock_type, 0, sv) != 0) {
202 DWARN("%s:%d: socketpair failed", __func__, __LINE__);
203 return -1;
204 }
205
206 /*
207 * Set a buffer lentgh large enough so that any FUSE packet
208 * will fit.
209 */
210 opt = (uint32_t)(4 * FUSE_BUFSIZE);
211 optlen = sizeof(opt);
212 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
213 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
214
215 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
216 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
217
218 if (setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
219 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
220
221 if (setsockopt(sv[1], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
222 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
223
224 /*
225 * Request peer credentials. This musr be done before first
226 * frame is sent.
227 */
228 opt = 1;
229 optlen = sizeof(opt);
230 if (setsockopt(sv[1], 0, LOCAL_CREDS, &opt, optlen) != 0)
231 DWARN("%s: setsockopt LOCAL_CREDS failed", __func__);
232
233 (void)sprintf(fdstr, "%d", sv[1]);
234
235 switch(fork()) {
236 case -1:
237 #ifdef PERFUSE_DEBUG
238 DWARN("%s:%d: fork failed", __func__, __LINE__);
239 #endif
240 return -1;
241 /* NOTREACHED */
242 break;
243 case 0:
244 (void)execve(argv[0], argv, environ);
245 #ifdef PERFUSE_DEBUG
246 DWARN("%s:%d: execve failed", __func__, __LINE__);
247 #endif
248 return -1;
249 /* NOTREACHED */
250 break;
251 default:
252 break;
253 }
254
255 return sv[0];
256 }
257
258 int
259 perfuse_mount(source, target, filesystemtype, mountflags, data)
260 const char *source;
261 const char *target;
262 const char *filesystemtype;
263 long mountflags;
264 const void *data;
265 {
266 int s;
267 size_t len;
268 struct perfuse_mount_out *pmo;
269 struct sockaddr_storage ss;
270 struct sockaddr_un *sun;
271 struct sockaddr *sa;
272 socklen_t sa_len;
273 size_t sock_len;
274 char *frame;
275 char *cp;
276
277 #ifdef PERFUSE_DEBUG
278 if (perfuse_diagflags & PDF_MISC)
279 DPRINTF("%s(\"%s\", \"%s\", \"%s\", 0x%lx, \"%s\")\n",
280 __func__, source, target, filesystemtype,
281 mountflags, (const char *)data);
282 #endif
283
284 if ((s = get_fd(data)) == -1)
285 return -1;
286
287 /*
288 * If we are connected to /dev/fuse, we need a second
289 * socket to get replies from perfused.
290 * XXX This socket is not removed at exit time yet
291 */
292 sock_len = 0;
293 sa = (struct sockaddr *)(void *)&ss;
294 sun = (struct sockaddr_un *)(void *)&ss;
295 sa_len = sizeof(ss);
296 if ((getpeername(s, sa, &sa_len) == 0) &&
297 (sa->sa_family = AF_LOCAL) &&
298 (strcmp(sun->sun_path, _PATH_FUSE) == 0)) {
299
300 sun->sun_len = sizeof(*sun);
301 sun->sun_family = AF_LOCAL;
302 (void)sprintf(sun->sun_path, "%s/%s-%d",
303 _PATH_TMP, getprogname(), getpid());
304
305 if (bind(s, sa, (socklen_t)sa->sa_len) != 0)
306 DERR(EX_OSERR, "%s:%d bind to \"%s\" failed",
307 __func__, __LINE__, sun->sun_path);
308
309 sock_len = strlen(sun->sun_path) + 1;
310 }
311
312 len = sizeof(*pmo);
313 len += source ? (uint32_t)strlen(source) + 1 : 0;
314 len += target ? (uint32_t)strlen(target) + 1 : 0;
315 len += filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0;
316 len += data ? (uint32_t)strlen(data) + 1 : 0;
317 len += sock_len;
318
319 if ((frame = malloc(len)) == NULL) {
320 #ifdef PERFUSE_DEBUG
321 if (perfuse_diagflags & PDF_MISC)
322 DWARN("%s:%d malloc failed", __func__, __LINE__);
323 #endif
324 return -1;
325 }
326
327 pmo = (struct perfuse_mount_out *)(void *)frame;
328 pmo->pmo_len = (uint32_t)len;
329 pmo->pmo_error = 0;
330 pmo->pmo_unique = (uint64_t)-1;
331 (void)strcpy(pmo->pmo_magic, PERFUSE_MOUNT_MAGIC);
332
333 pmo->pmo_source_len = source ? (uint32_t)strlen(source) + 1 : 0;
334 pmo->pmo_target_len = target ? (uint32_t)strlen(target) + 1: 0;
335 pmo->pmo_filesystemtype_len =
336 filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0;
337 pmo->pmo_mountflags = (uint32_t)mountflags;
338 pmo->pmo_data_len = data ? (uint32_t)strlen(data) + 1 : 0;
339 pmo->pmo_sock_len = (uint32_t)sock_len;
340
341 cp = (char *)(void *)(pmo + 1);
342
343 if (source) {
344 (void)strcpy(cp, source);
345 cp += pmo->pmo_source_len;
346 }
347
348 if (target) {
349 (void)strcpy(cp, target);
350 cp += pmo->pmo_target_len;
351 }
352
353 if (filesystemtype) {
354 (void)strcpy(cp, filesystemtype);
355 cp += pmo->pmo_filesystemtype_len;
356 }
357
358 if (data) {
359 (void)strcpy(cp, data);
360 cp += pmo->pmo_data_len;
361 }
362
363 if (sock_len != 0) {
364 (void)strcpy(cp, sun->sun_path);
365 cp += pmo->pmo_sock_len;
366 }
367
368 if (send(s, frame, len, MSG_NOSIGNAL) != (ssize_t)len) {
369 #ifdef PERFUSE_DEBUG
370 DWARN("%s:%d sendto failed", __func__, __LINE__);
371 #endif
372 return -1;
373 }
374
375 return 0;
376 }
377
378
379 uint64_t
380 perfuse_next_unique(pu)
381 struct puffs_usermount *pu;
382 {
383 struct perfuse_state *ps;
384
385 ps = puffs_getspecific(pu);
386
387 return ps->ps_unique++;
388 }
389
390 struct puffs_usermount *
391 perfuse_init(pc, pmi)
392 struct perfuse_callbacks *pc;
393 struct perfuse_mount_info *pmi;
394 {
395 struct perfuse_state *ps;
396 struct puffs_usermount *pu;
397 struct puffs_ops *pops;
398 const char *source = _PATH_PUFFS;
399 char *fstype;
400 unsigned int puffs_flags;
401 struct puffs_node *pn_root;
402 struct puffs_pathobj *po_root;
403 struct rlimit rl;
404
405 /*
406 * perfused can grow quite large, let assume there's enough ram ...
407 */
408 if (getrlimit(RLIMIT_DATA, &rl) < 0) {
409 DERR(EX_OSERR, "%s: getrlimit failed: %s", __func__,
410 strerror(errno));
411 } else {
412 rl.rlim_cur = rl.rlim_max;
413 if (setrlimit(RLIMIT_DATA, &rl) < 0) {
414 DERR(EX_OSERR, "%s: setrlimit failed: %s", __func__,
415 strerror(errno));
416 }
417 }
418
419
420 ps = init_state();
421 ps->ps_owner_uid = pmi->pmi_uid;
422
423 if (pmi->pmi_source) {
424 if ((ps->ps_source = strdup(pmi->pmi_source)) == NULL)
425 DERR(EX_OSERR, "%s: strdup failed", __func__);
426
427 source = ps->ps_source;
428 }
429
430 if (pmi->pmi_filesystemtype) {
431 size_t len;
432
433 ps->ps_filesystemtype = strdup(pmi->pmi_filesystemtype);
434 if (ps->ps_filesystemtype == NULL)
435 DERR(EX_OSERR, "%s: strdup failed", __func__);
436
437 len = sizeof("perfuse|") + strlen(ps->ps_filesystemtype) + 1;
438 if ((fstype = malloc(len)) == NULL)
439 DERR(EX_OSERR, "%s: malloc failed", __func__);
440
441 (void)sprintf(fstype, "perfuse|%s", ps->ps_filesystemtype);
442 } else {
443 if ((fstype = strdup("perfuse")) == NULL)
444 DERR(EX_OSERR, "%s: strdup failed", __func__);
445 }
446
447 if ((ps->ps_target = strdup(pmi->pmi_target)) == NULL)
448 DERR(EX_OSERR, "%s: strdup failed", __func__);
449
450 ps->ps_mountflags = pmi->pmi_mountflags;
451
452 /*
453 * Some options are forbidden for non root users
454 */
455 if (ps->ps_owner_uid != 0)
456 ps->ps_mountflags |= MNT_NOSUID|MNT_NODEV;
457
458 PUFFSOP_INIT(pops);
459 PUFFSOP_SET(pops, perfuse, fs, unmount);
460 PUFFSOP_SET(pops, perfuse, fs, statvfs);
461 PUFFSOP_SET(pops, perfuse, fs, sync);
462 PUFFSOP_SET(pops, perfuse, node, lookup);
463 PUFFSOP_SET(pops, perfuse, node, create);
464 PUFFSOP_SET(pops, perfuse, node, mknod);
465 PUFFSOP_SET(pops, perfuse, node, open);
466 PUFFSOP_SET(pops, perfuse, node, close);
467 PUFFSOP_SET(pops, perfuse, node, access);
468 PUFFSOP_SET(pops, perfuse, node, getattr);
469 PUFFSOP_SET(pops, perfuse, node, setattr);
470 PUFFSOP_SET(pops, perfuse, node, poll);
471 #if 0
472 PUFFSOP_SET(pops, perfuse, node, mmap);
473 #endif
474 PUFFSOP_SET(pops, perfuse, node, fsync);
475 PUFFSOP_SET(pops, perfuse, node, seek);
476 PUFFSOP_SET(pops, perfuse, node, remove);
477 PUFFSOP_SET(pops, perfuse, node, link);
478 PUFFSOP_SET(pops, perfuse, node, rename);
479 PUFFSOP_SET(pops, perfuse, node, mkdir);
480 PUFFSOP_SET(pops, perfuse, node, rmdir);
481 PUFFSOP_SET(pops, perfuse, node, symlink);
482 PUFFSOP_SET(pops, perfuse, node, readdir);
483 PUFFSOP_SET(pops, perfuse, node, readlink);
484 PUFFSOP_SET(pops, perfuse, node, reclaim);
485 PUFFSOP_SET(pops, perfuse, node, inactive);
486 PUFFSOP_SET(pops, perfuse, node, print);
487 PUFFSOP_SET(pops, perfuse, node, advlock);
488 PUFFSOP_SET(pops, perfuse, node, read);
489 PUFFSOP_SET(pops, perfuse, node, write);
490 #ifdef PUFFS_EXTNAMELEN
491 PUFFSOP_SET(pops, perfuse, node, getextattr);
492 PUFFSOP_SET(pops, perfuse, node, setextattr);
493 PUFFSOP_SET(pops, perfuse, node, listextattr);
494 PUFFSOP_SET(pops, perfuse, node, deleteextattr);
495 #endif /* PUFFS_EXTNAMELEN */
496
497 /*
498 * We used to have PUFFS_KFLAG_WTCACHE here, which uses the
499 * page cache (highly desirable to get mmap(2)), but still sends
500 * all writes to the filesystem. In fact it does not send the
501 * data written, but the pages that contain it.
502 *
503 * There is a nasty bug hidden somewhere, possibly in libpuffs'
504 * VOP_FSYNC, which sends an asynchronous PUFFS_SETATTR that
505 * update file size. When writes are in progress, it will cause
506 * the file to be truncated and we get a zero-filled chunk at the
507 * beginning of a page. Removing PUFFS_KFLAG_WTCACHE fixes that
508 * problem.
509 *
510 * The other consequences are that changes will not be propagated
511 * immediatly to the filesystem, and we get a huge performance gain
512 * because much less requests are sent. A test case for the above
513 * mentioned bug got its execution time slashed by factor 50.
514 *
515 * PUFFS_KFLAG_NOCACHE_NAME is required so that we can see changes
516 * done by other machines in networked filesystems.
517 */
518 puffs_flags = PUFFS_KFLAG_NOCACHE_NAME;
519
520 if (perfuse_diagflags & PDF_PUFFS)
521 puffs_flags |= PUFFS_FLAG_OPDUMP;
522
523 if ((pu = puffs_init(pops, source, fstype, ps, puffs_flags)) == NULL)
524 DERR(EX_OSERR, "%s: puffs_init failed", __func__);
525
526 ps->ps_pu = pu;
527
528 /*
529 * Setup filesystem root
530 */
531 pn_root = perfuse_new_pn(pu, "", NULL);
532 PERFUSE_NODE_DATA(pn_root)->pnd_nodeid = FUSE_ROOT_ID;
533 PERFUSE_NODE_DATA(pn_root)->pnd_parent = pn_root;
534 puffs_setroot(pu, pn_root);
535 ps->ps_fsid = pn_root->pn_va.va_fsid;
536
537 po_root = puffs_getrootpathobj(pu);
538 if ((po_root->po_path = strdup("/")) == NULL)
539 DERRX(EX_OSERR, "perfuse_mount_start() failed");
540
541 po_root->po_len = 1;
542 puffs_path_buildhash(pu, po_root);
543
544 puffs_vattr_null(&pn_root->pn_va);
545 pn_root->pn_va.va_type = VDIR;
546 pn_root->pn_va.va_mode = 0755;
547 pn_root->pn_va.va_fileid = FUSE_ROOT_ID;
548
549 ps->ps_root = pn_root;
550
551 /*
552 * Callbacks
553 */
554 ps->ps_new_msg = pc->pc_new_msg;
555 ps->ps_xchg_msg = pc->pc_xchg_msg;
556 ps->ps_destroy_msg = pc->pc_destroy_msg;
557 ps->ps_get_inhdr = pc->pc_get_inhdr;
558 ps->ps_get_inpayload = pc->pc_get_inpayload;
559 ps->ps_get_outhdr = pc->pc_get_outhdr;
560 ps->ps_get_outpayload = pc->pc_get_outpayload;
561 ps->ps_umount = pc->pc_umount;
562
563 return pu;
564 }
565
566 void
567 perfuse_setspecific(pu, priv)
568 struct puffs_usermount *pu;
569 void *priv;
570 {
571 struct perfuse_state *ps;
572
573 ps = puffs_getspecific(pu);
574 ps->ps_private = priv;
575
576 return;
577 }
578
579 void *
580 perfuse_getspecific(pu)
581 struct puffs_usermount *pu;
582 {
583 struct perfuse_state *ps;
584
585 ps = puffs_getspecific(pu);
586
587 return ps->ps_private;
588 }
589
590 int
591 perfuse_inloop(pu)
592 struct puffs_usermount *pu;
593 {
594 struct perfuse_state *ps;
595
596 ps = puffs_getspecific(pu);
597
598 return ps->ps_flags & PS_INLOOP;
599 }
600
601 int
602 perfuse_mainloop(pu)
603 struct puffs_usermount *pu;
604 {
605 struct perfuse_state *ps;
606
607 ps = puffs_getspecific(pu);
608
609 ps->ps_flags |= PS_INLOOP;
610 if (puffs_mainloop(ps->ps_pu) != 0) {
611 DERR(EX_OSERR, "%s: failed", __func__);
612 return -1;
613 }
614
615 /*
616 * Normal exit after unmount
617 */
618 return 0;
619 }
620
621 /* ARGSUSED0 */
622 uint64_t
623 perfuse_get_nodeid(pu, opc)
624 struct puffs_usermount *pu;
625 puffs_cookie_t opc;
626 {
627 return PERFUSE_NODE_DATA(opc)->pnd_nodeid;
628 }
629
630 int
631 perfuse_unmount(pu)
632 struct puffs_usermount *pu;
633 {
634 struct perfuse_state *ps;
635
636 ps = puffs_getspecific(pu);
637
638 return unmount(ps->ps_target, MNT_FORCE);
639 }
640