perfuse.c revision 1.24 1 /* $NetBSD: perfuse.c,v 1.24 2011/12/28 17:33:53 manu Exp $ */
2
3 /*-
4 * Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <fcntl.h>
32 #include <string.h>
33 #include <errno.h>
34 #include <puffs.h>
35 #include <sys/types.h>
36 #include <sys/mman.h>
37 #include <sys/resource.h>
38 #include <sys/socket.h>
39 #include <sys/extattr.h>
40 #include <sys/un.h>
41 #include <machine/vmparam.h>
42
43 #define LIBPERFUSE
44 #include "perfuse.h"
45 #include "perfuse_if.h"
46 #include "perfuse_priv.h"
47
48 int perfuse_diagflags = 0; /* global used only in DPRINTF/DERR/DWARN */
49 extern char **environ;
50
51 static struct perfuse_state *init_state(void);
52 static int get_fd(const char *);
53
54
55 static struct perfuse_state *
56 init_state(void)
57 {
58 struct perfuse_state *ps;
59 char opts[1024];
60
61 if ((ps = malloc(sizeof(*ps))) == NULL)
62 DERR(EX_OSERR, "%s: malloc failed", __func__);
63
64 (void)memset(ps, 0, sizeof(*ps));
65 ps->ps_max_write = UINT_MAX;
66 ps->ps_max_readahead = UINT_MAX;
67 TAILQ_INIT(&ps->ps_trace);
68
69 /*
70 * Most of the time, access() is broken because the filesystem
71 * performs the check with root privileges. glusterfs will do that
72 * if the Linux-specific setfsuid() is missing, for instance.
73 */
74 ps->ps_flags |= PS_NO_ACCESS;
75
76 /*
77 * This is a temporary way to toggle access and creat usage.
78 * It would be nice if that could be provided as mount options,
79 * but that will not be obvious to do.
80 */
81 if (getenv_r("PERFUSE_OPTIONS", opts, sizeof(opts)) != -1) {
82 char *optname;
83 char *last;
84
85 for ((optname = strtok_r(opts, ",", &last));
86 optname != NULL;
87 (optname = strtok_r(NULL, ",", &last))) {
88 if (strcmp(optname, "enable_access") == 0)
89 ps->ps_flags &= ~PS_NO_ACCESS;
90
91 if (strcmp(optname, "disable_access") == 0)
92 ps->ps_flags |= PS_NO_ACCESS;
93
94 if (strcmp(optname, "enable_creat") == 0)
95 ps->ps_flags &= ~PS_NO_CREAT;
96
97 if (strcmp(optname, "disable_creat") == 0)
98 ps->ps_flags |= PS_NO_CREAT;
99 }
100 }
101
102
103 return ps;
104 }
105
106
107 static int
108 get_fd(data)
109 const char *data;
110 {
111 char *string;
112 const char fdopt[] = "fd=";
113 char *lastp;
114 char *opt;
115 int fd = -1;
116
117 if ((string = strdup(data)) == NULL)
118 return -1;
119
120 for (opt = strtok_r(string, ",", &lastp);
121 opt != NULL;
122 opt = strtok_r(NULL, ",", &lastp)) {
123 if (strncmp(opt, fdopt, strlen(fdopt)) == 0) {
124 fd = atoi(opt + strlen(fdopt));
125 break;
126 }
127 }
128
129 /*
130 * No file descriptor found
131 */
132 if (fd == -1)
133 errno = EINVAL;
134
135 free(string);
136 return fd;
137
138 }
139
140 int
141 perfuse_open(path, flags, mode)
142 const char *path;
143 int flags;
144 mode_t mode;
145 {
146 int sv[2];
147 struct sockaddr_un sun;
148 struct sockaddr *sa;
149 char progname[] = _PATH_PERFUSED;
150 char minus_i[] = "-i";
151 char fdstr[16];
152 char *const argv[] = { progname, minus_i, fdstr, NULL};
153 uint32_t opt;
154 uint32_t optlen;
155 int sock_type = SOCK_SEQPACKET;
156
157 if (strcmp(path, _PATH_FUSE) != 0)
158 return open(path, flags, mode);
159
160 /*
161 * Try SOCK_SEQPACKET then SOCK_DGRAM if unavailable
162 */
163 if ((sv[0] = socket(PF_LOCAL, SOCK_SEQPACKET, 0)) == -1) {
164 sock_type = SOCK_DGRAM;
165 DWARNX("SEQPACKET local sockets unavailable, using less "
166 "reliable DGRAM sockets. Expect file operation hangs.");
167
168 if ((sv[0] = socket(PF_LOCAL, SOCK_DGRAM, 0)) == -1) {
169 #ifdef PERFUSE_DEBUG
170 DWARN("%s: %d socket failed", __func__, __LINE__);
171 #endif
172 return -1;
173 }
174 }
175
176 /*
177 * Set a buffer lentgh large enough so that any FUSE packet
178 * will fit.
179 */
180 opt = (uint32_t)FUSE_BUFSIZE;
181 optlen = sizeof(opt);
182 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
183 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
184
185 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
186 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
187
188 sa = (struct sockaddr *)(void *)&sun;
189 sun.sun_len = sizeof(sun);
190 sun.sun_family = AF_LOCAL;
191 (void)strcpy(sun.sun_path, path);
192
193 if (connect(sv[0], sa, (socklen_t)sun.sun_len) == 0)
194 return sv[0];
195
196 /*
197 * Attempt to run perfused on our own
198 * if it does not run yet; In that case
199 * we will talk using a socketpair
200 * instead of /dev/fuse.
201 */
202 if (socketpair(PF_LOCAL, sock_type, 0, sv) != 0) {
203 DWARN("%s:%d: socketpair failed", __func__, __LINE__);
204 return -1;
205 }
206
207 /*
208 * Set a buffer lentgh large enough so that any FUSE packet
209 * will fit.
210 */
211 opt = (uint32_t)(4 * FUSE_BUFSIZE);
212 optlen = sizeof(opt);
213 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
214 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
215
216 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
217 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
218
219 if (setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
220 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
221
222 if (setsockopt(sv[1], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
223 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
224
225 /*
226 * Request peer credentials. This musr be done before first
227 * frame is sent.
228 */
229 opt = 1;
230 optlen = sizeof(opt);
231 if (setsockopt(sv[1], 0, LOCAL_CREDS, &opt, optlen) != 0)
232 DWARN("%s: setsockopt LOCAL_CREDS failed", __func__);
233
234 (void)sprintf(fdstr, "%d", sv[1]);
235
236 switch(fork()) {
237 case -1:
238 #ifdef PERFUSE_DEBUG
239 DWARN("%s:%d: fork failed", __func__, __LINE__);
240 #endif
241 return -1;
242 /* NOTREACHED */
243 break;
244 case 0:
245 (void)execve(argv[0], argv, environ);
246 #ifdef PERFUSE_DEBUG
247 DWARN("%s:%d: execve failed", __func__, __LINE__);
248 #endif
249 return -1;
250 /* NOTREACHED */
251 break;
252 default:
253 break;
254 }
255
256 return sv[0];
257 }
258
259 int
260 perfuse_mount(source, target, filesystemtype, mountflags, data)
261 const char *source;
262 const char *target;
263 const char *filesystemtype;
264 long mountflags;
265 const void *data;
266 {
267 int s;
268 size_t len;
269 struct perfuse_mount_out *pmo;
270 struct sockaddr_storage ss;
271 struct sockaddr_un *sun;
272 struct sockaddr *sa;
273 socklen_t sa_len;
274 size_t sock_len;
275 char *frame;
276 char *cp;
277
278 #ifdef PERFUSE_DEBUG
279 if (perfuse_diagflags & PDF_MISC)
280 DPRINTF("%s(\"%s\", \"%s\", \"%s\", 0x%lx, \"%s\")\n",
281 __func__, source, target, filesystemtype,
282 mountflags, (const char *)data);
283 #endif
284
285 if ((s = get_fd(data)) == -1)
286 return -1;
287
288 /*
289 * If we are connected to /dev/fuse, we need a second
290 * socket to get replies from perfused.
291 * XXX This socket is not removed at exit time yet
292 */
293 sock_len = 0;
294 sa = (struct sockaddr *)(void *)&ss;
295 sun = (struct sockaddr_un *)(void *)&ss;
296 sa_len = sizeof(ss);
297 if ((getpeername(s, sa, &sa_len) == 0) &&
298 (sa->sa_family = AF_LOCAL) &&
299 (strcmp(sun->sun_path, _PATH_FUSE) == 0)) {
300
301 sun->sun_len = sizeof(*sun);
302 sun->sun_family = AF_LOCAL;
303 (void)sprintf(sun->sun_path, "%s/%s-%d",
304 _PATH_TMP, getprogname(), getpid());
305
306 if (bind(s, sa, (socklen_t)sa->sa_len) != 0)
307 DERR(EX_OSERR, "%s:%d bind to \"%s\" failed",
308 __func__, __LINE__, sun->sun_path);
309
310 sock_len = strlen(sun->sun_path) + 1;
311 }
312
313 len = sizeof(*pmo);
314 len += source ? (uint32_t)strlen(source) + 1 : 0;
315 len += target ? (uint32_t)strlen(target) + 1 : 0;
316 len += filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0;
317 len += data ? (uint32_t)strlen(data) + 1 : 0;
318 len += sock_len;
319
320 if ((frame = malloc(len)) == NULL) {
321 #ifdef PERFUSE_DEBUG
322 if (perfuse_diagflags & PDF_MISC)
323 DWARN("%s:%d malloc failed", __func__, __LINE__);
324 #endif
325 return -1;
326 }
327
328 pmo = (struct perfuse_mount_out *)(void *)frame;
329 pmo->pmo_len = (uint32_t)len;
330 pmo->pmo_error = 0;
331 pmo->pmo_unique = (uint64_t)-1;
332 (void)strcpy(pmo->pmo_magic, PERFUSE_MOUNT_MAGIC);
333
334 pmo->pmo_source_len = source ? (uint32_t)strlen(source) + 1 : 0;
335 pmo->pmo_target_len = target ? (uint32_t)strlen(target) + 1: 0;
336 pmo->pmo_filesystemtype_len =
337 filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0;
338 pmo->pmo_mountflags = (uint32_t)mountflags;
339 pmo->pmo_data_len = data ? (uint32_t)strlen(data) + 1 : 0;
340 pmo->pmo_sock_len = (uint32_t)sock_len;
341
342 cp = (char *)(void *)(pmo + 1);
343
344 if (source) {
345 (void)strcpy(cp, source);
346 cp += pmo->pmo_source_len;
347 }
348
349 if (target) {
350 (void)strcpy(cp, target);
351 cp += pmo->pmo_target_len;
352 }
353
354 if (filesystemtype) {
355 (void)strcpy(cp, filesystemtype);
356 cp += pmo->pmo_filesystemtype_len;
357 }
358
359 if (data) {
360 (void)strcpy(cp, data);
361 cp += pmo->pmo_data_len;
362 }
363
364 if (sock_len != 0) {
365 (void)strcpy(cp, sun->sun_path);
366 cp += pmo->pmo_sock_len;
367 }
368
369 if (send(s, frame, len, MSG_NOSIGNAL) != (ssize_t)len) {
370 #ifdef PERFUSE_DEBUG
371 DWARN("%s:%d sendto failed", __func__, __LINE__);
372 #endif
373 return -1;
374 }
375
376 return 0;
377 }
378
379
380 uint64_t
381 perfuse_next_unique(pu)
382 struct puffs_usermount *pu;
383 {
384 struct perfuse_state *ps;
385
386 ps = puffs_getspecific(pu);
387
388 return ps->ps_unique++;
389 }
390
391 struct puffs_usermount *
392 perfuse_init(pc, pmi)
393 struct perfuse_callbacks *pc;
394 struct perfuse_mount_info *pmi;
395 {
396 struct perfuse_state *ps;
397 struct puffs_usermount *pu;
398 struct puffs_ops *pops;
399 const char *source = _PATH_PUFFS;
400 char *fstype;
401 unsigned int puffs_flags;
402 struct puffs_node *pn_root;
403 struct puffs_pathobj *po_root;
404 struct rlimit rl;
405
406 /*
407 * perfused can grow quite large, let assume there's enough ram ...
408 */
409 if (getrlimit(RLIMIT_DATA, &rl) < 0) {
410 DERR(EX_OSERR, "%s: getrlimit failed: %s", __func__,
411 strerror(errno));
412 } else {
413 rl.rlim_cur = rl.rlim_max;
414 if (setrlimit(RLIMIT_DATA, &rl) < 0) {
415 DERR(EX_OSERR, "%s: setrlimit failed: %s", __func__,
416 strerror(errno));
417 }
418 }
419
420
421 ps = init_state();
422 ps->ps_owner_uid = pmi->pmi_uid;
423
424 if (pmi->pmi_source) {
425 if ((ps->ps_source = strdup(pmi->pmi_source)) == NULL)
426 DERR(EX_OSERR, "%s: strdup failed", __func__);
427
428 source = ps->ps_source;
429 }
430
431 if (pmi->pmi_filesystemtype) {
432 size_t len;
433
434 ps->ps_filesystemtype = strdup(pmi->pmi_filesystemtype);
435 if (ps->ps_filesystemtype == NULL)
436 DERR(EX_OSERR, "%s: strdup failed", __func__);
437
438 len = sizeof("perfuse|") + strlen(ps->ps_filesystemtype) + 1;
439 if ((fstype = malloc(len)) == NULL)
440 DERR(EX_OSERR, "%s: malloc failed", __func__);
441
442 (void)sprintf(fstype, "perfuse|%s", ps->ps_filesystemtype);
443 } else {
444 if ((fstype = strdup("perfuse")) == NULL)
445 DERR(EX_OSERR, "%s: strdup failed", __func__);
446 }
447
448 if ((ps->ps_target = strdup(pmi->pmi_target)) == NULL)
449 DERR(EX_OSERR, "%s: strdup failed", __func__);
450
451 ps->ps_mountflags = pmi->pmi_mountflags;
452
453 /*
454 * Some options are forbidden for non root users
455 */
456 if (ps->ps_owner_uid != 0)
457 ps->ps_mountflags |= MNT_NOSUID|MNT_NODEV;
458
459 PUFFSOP_INIT(pops);
460 PUFFSOP_SET(pops, perfuse, fs, unmount);
461 PUFFSOP_SET(pops, perfuse, fs, statvfs);
462 PUFFSOP_SET(pops, perfuse, fs, sync);
463 PUFFSOP_SET(pops, perfuse, node, lookup);
464 PUFFSOP_SET(pops, perfuse, node, create);
465 PUFFSOP_SET(pops, perfuse, node, mknod);
466 PUFFSOP_SET(pops, perfuse, node, open);
467 PUFFSOP_SET(pops, perfuse, node, close);
468 PUFFSOP_SET(pops, perfuse, node, access);
469 PUFFSOP_SET(pops, perfuse, node, getattr);
470 PUFFSOP_SET(pops, perfuse, node, setattr);
471 PUFFSOP_SET(pops, perfuse, node, poll);
472 #if 0
473 PUFFSOP_SET(pops, perfuse, node, mmap);
474 #endif
475 PUFFSOP_SET(pops, perfuse, node, fsync);
476 PUFFSOP_SET(pops, perfuse, node, seek);
477 PUFFSOP_SET(pops, perfuse, node, remove);
478 PUFFSOP_SET(pops, perfuse, node, link);
479 PUFFSOP_SET(pops, perfuse, node, rename);
480 PUFFSOP_SET(pops, perfuse, node, mkdir);
481 PUFFSOP_SET(pops, perfuse, node, rmdir);
482 PUFFSOP_SET(pops, perfuse, node, symlink);
483 PUFFSOP_SET(pops, perfuse, node, readdir);
484 PUFFSOP_SET(pops, perfuse, node, readlink);
485 PUFFSOP_SET(pops, perfuse, node, reclaim);
486 PUFFSOP_SET(pops, perfuse, node, inactive);
487 PUFFSOP_SET(pops, perfuse, node, print);
488 PUFFSOP_SET(pops, perfuse, node, advlock);
489 PUFFSOP_SET(pops, perfuse, node, read);
490 PUFFSOP_SET(pops, perfuse, node, write);
491 #ifdef PUFFS_EXTNAMELEN
492 PUFFSOP_SET(pops, perfuse, node, getextattr);
493 PUFFSOP_SET(pops, perfuse, node, setextattr);
494 PUFFSOP_SET(pops, perfuse, node, listextattr);
495 PUFFSOP_SET(pops, perfuse, node, deleteextattr);
496 #endif /* PUFFS_EXTNAMELEN */
497
498 /*
499 * We used to have PUFFS_KFLAG_WTCACHE here, which uses the
500 * page cache (highly desirable to get mmap(2)), but still sends
501 * all writes to the filesystem. In fact it does not send the
502 * data written, but the pages that contain it.
503 *
504 * There is a nasty bug hidden somewhere, possibly in libpuffs'
505 * VOP_FSYNC, which sends an asynchronous PUFFS_SETATTR that
506 * update file size. When writes are in progress, it will cause
507 * the file to be truncated and we get a zero-filled chunk at the
508 * beginning of a page. Removing PUFFS_KFLAG_WTCACHE fixes that
509 * problem.
510 *
511 * The other consequences are that changes will not be propagated
512 * immediatly to the filesystem, and we get a huge performance gain
513 * because much less requests are sent. A test case for the above
514 * mentioned bug got its execution time slashed by factor 50.
515 *
516 * PUFFS_KFLAG_NOCACHE_NAME is required so that we can see changes
517 * done by other machines in networked filesystems.
518 */
519 puffs_flags = PUFFS_KFLAG_NOCACHE_NAME;
520
521 if (perfuse_diagflags & PDF_PUFFS)
522 puffs_flags |= PUFFS_FLAG_OPDUMP;
523
524 if ((pu = puffs_init(pops, source, fstype, ps, puffs_flags)) == NULL)
525 DERR(EX_OSERR, "%s: puffs_init failed", __func__);
526
527 ps->ps_pu = pu;
528
529 /*
530 * Setup filesystem root
531 */
532 pn_root = perfuse_new_pn(pu, "", NULL);
533 PERFUSE_NODE_DATA(pn_root)->pnd_nodeid = FUSE_ROOT_ID;
534 PERFUSE_NODE_DATA(pn_root)->pnd_parent = pn_root;
535 puffs_setroot(pu, pn_root);
536 ps->ps_fsid = pn_root->pn_va.va_fsid;
537
538 po_root = puffs_getrootpathobj(pu);
539 if ((po_root->po_path = strdup("/")) == NULL)
540 DERRX(EX_OSERR, "perfuse_mount_start() failed");
541
542 po_root->po_len = 1;
543 puffs_path_buildhash(pu, po_root);
544
545 puffs_vattr_null(&pn_root->pn_va);
546 pn_root->pn_va.va_type = VDIR;
547 pn_root->pn_va.va_mode = 0755;
548 pn_root->pn_va.va_fileid = FUSE_ROOT_ID;
549
550 ps->ps_root = pn_root;
551
552 /*
553 * Callbacks
554 */
555 ps->ps_new_msg = pc->pc_new_msg;
556 ps->ps_xchg_msg = pc->pc_xchg_msg;
557 ps->ps_destroy_msg = pc->pc_destroy_msg;
558 ps->ps_get_inhdr = pc->pc_get_inhdr;
559 ps->ps_get_inpayload = pc->pc_get_inpayload;
560 ps->ps_get_outhdr = pc->pc_get_outhdr;
561 ps->ps_get_outpayload = pc->pc_get_outpayload;
562 ps->ps_umount = pc->pc_umount;
563
564 return pu;
565 }
566
567 void
568 perfuse_setspecific(pu, priv)
569 struct puffs_usermount *pu;
570 void *priv;
571 {
572 struct perfuse_state *ps;
573
574 ps = puffs_getspecific(pu);
575 ps->ps_private = priv;
576
577 return;
578 }
579
580 void *
581 perfuse_getspecific(pu)
582 struct puffs_usermount *pu;
583 {
584 struct perfuse_state *ps;
585
586 ps = puffs_getspecific(pu);
587
588 return ps->ps_private;
589 }
590
591 int
592 perfuse_inloop(pu)
593 struct puffs_usermount *pu;
594 {
595 struct perfuse_state *ps;
596
597 ps = puffs_getspecific(pu);
598
599 return ps->ps_flags & PS_INLOOP;
600 }
601
602 int
603 perfuse_mainloop(pu)
604 struct puffs_usermount *pu;
605 {
606 struct perfuse_state *ps;
607
608 ps = puffs_getspecific(pu);
609
610 ps->ps_flags |= PS_INLOOP;
611 if (puffs_mainloop(ps->ps_pu) != 0) {
612 DERR(EX_OSERR, "%s: failed", __func__);
613 return -1;
614 }
615
616 /*
617 * Normal exit after unmount
618 */
619 return 0;
620 }
621
622 /* ARGSUSED0 */
623 uint64_t
624 perfuse_get_nodeid(pu, opc)
625 struct puffs_usermount *pu;
626 puffs_cookie_t opc;
627 {
628 return PERFUSE_NODE_DATA(opc)->pnd_nodeid;
629 }
630
631 int
632 perfuse_unmount(pu)
633 struct puffs_usermount *pu;
634 {
635 struct perfuse_state *ps;
636
637 ps = puffs_getspecific(pu);
638
639 return unmount(ps->ps_target, MNT_FORCE);
640 }
641