perfuse.c revision 1.26 1 /* $NetBSD: perfuse.c,v 1.26 2012/03/21 10:10:36 matt Exp $ */
2
3 /*-
4 * Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <fcntl.h>
32 #include <string.h>
33 #include <errno.h>
34 #include <puffs.h>
35 #include <sys/types.h>
36 #include <sys/mman.h>
37 #include <sys/resource.h>
38 #include <sys/socket.h>
39 #include <sys/extattr.h>
40 #include <sys/un.h>
41 #include <machine/vmparam.h>
42
43 #define LIBPERFUSE
44 #include "perfuse.h"
45 #include "perfuse_if.h"
46 #include "perfuse_priv.h"
47
48 int perfuse_diagflags = 0; /* global used only in DPRINTF/DERR/DWARN */
49 extern char **environ;
50
51 static struct perfuse_state *init_state(void);
52 static int get_fd(const char *);
53
54
55 static struct perfuse_state *
56 init_state(void)
57 {
58 struct perfuse_state *ps;
59 char opts[1024];
60
61 if ((ps = malloc(sizeof(*ps))) == NULL)
62 DERR(EX_OSERR, "%s: malloc failed", __func__);
63
64 (void)memset(ps, 0, sizeof(*ps));
65 ps->ps_max_write = UINT_MAX;
66 ps->ps_max_readahead = UINT_MAX;
67 TAILQ_INIT(&ps->ps_trace);
68
69 /*
70 * Most of the time, access() is broken because the filesystem
71 * performs the check with root privileges. glusterfs will do that
72 * if the Linux-specific setfsuid() is missing, for instance.
73 */
74 ps->ps_flags |= PS_NO_ACCESS;
75
76 /*
77 * This is a temporary way to toggle access and creat usage.
78 * It would be nice if that could be provided as mount options,
79 * but that will not be obvious to do.
80 */
81 if (getenv_r("PERFUSE_OPTIONS", opts, sizeof(opts)) != -1) {
82 char *optname;
83 char *last;
84
85 for ((optname = strtok_r(opts, ",", &last));
86 optname != NULL;
87 (optname = strtok_r(NULL, ",", &last))) {
88 if (strcmp(optname, "enable_access") == 0)
89 ps->ps_flags &= ~PS_NO_ACCESS;
90
91 if (strcmp(optname, "disable_access") == 0)
92 ps->ps_flags |= PS_NO_ACCESS;
93
94 if (strcmp(optname, "enable_creat") == 0)
95 ps->ps_flags &= ~PS_NO_CREAT;
96
97 if (strcmp(optname, "disable_creat") == 0)
98 ps->ps_flags |= PS_NO_CREAT;
99 }
100 }
101
102
103 return ps;
104 }
105
106
107 static int
108 get_fd(const char *data)
109 {
110 char *string;
111 const char fdopt[] = "fd=";
112 char *lastp;
113 char *opt;
114 int fd = -1;
115
116 if ((string = strdup(data)) == NULL)
117 return -1;
118
119 for (opt = strtok_r(string, ",", &lastp);
120 opt != NULL;
121 opt = strtok_r(NULL, ",", &lastp)) {
122 if (strncmp(opt, fdopt, strlen(fdopt)) == 0) {
123 fd = atoi(opt + strlen(fdopt));
124 break;
125 }
126 }
127
128 /*
129 * No file descriptor found
130 */
131 if (fd == -1)
132 errno = EINVAL;
133
134 free(string);
135 return fd;
136
137 }
138
139 int
140 perfuse_open(const char *path, int flags, mode_t mode)
141 {
142 int sv[2];
143 struct sockaddr_un sun;
144 struct sockaddr *sa;
145 char progname[] = _PATH_PERFUSED;
146 char minus_i[] = "-i";
147 char fdstr[16];
148 char *const argv[] = { progname, minus_i, fdstr, NULL};
149 uint32_t opt;
150 uint32_t optlen;
151 int sock_type = SOCK_SEQPACKET;
152
153 if (strcmp(path, _PATH_FUSE) != 0)
154 return open(path, flags, mode);
155
156 /*
157 * Try SOCK_SEQPACKET then SOCK_DGRAM if unavailable
158 */
159 if ((sv[0] = socket(PF_LOCAL, SOCK_SEQPACKET, 0)) == -1) {
160 sock_type = SOCK_DGRAM;
161 DWARNX("SEQPACKET local sockets unavailable, using less "
162 "reliable DGRAM sockets. Expect file operation hangs.");
163
164 if ((sv[0] = socket(PF_LOCAL, SOCK_DGRAM, 0)) == -1) {
165 #ifdef PERFUSE_DEBUG
166 DWARN("%s: %d socket failed", __func__, __LINE__);
167 #endif
168 return -1;
169 }
170 }
171
172 /*
173 * Set a buffer lentgh large enough so that any FUSE packet
174 * will fit.
175 */
176 opt = (uint32_t)FUSE_BUFSIZE;
177 optlen = sizeof(opt);
178 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
179 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
180
181 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
182 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
183
184 sa = (struct sockaddr *)(void *)&sun;
185 sun.sun_len = sizeof(sun);
186 sun.sun_family = AF_LOCAL;
187 (void)strcpy(sun.sun_path, path);
188
189 if (connect(sv[0], sa, (socklen_t)sun.sun_len) == 0)
190 return sv[0];
191
192 /*
193 * Attempt to run perfused on our own
194 * if it does not run yet; In that case
195 * we will talk using a socketpair
196 * instead of /dev/fuse.
197 */
198 if (socketpair(PF_LOCAL, sock_type, 0, sv) != 0) {
199 DWARN("%s:%d: socketpair failed", __func__, __LINE__);
200 return -1;
201 }
202
203 /*
204 * Set a buffer lentgh large enough so that any FUSE packet
205 * will fit.
206 */
207 opt = (uint32_t)(4 * FUSE_BUFSIZE);
208 optlen = sizeof(opt);
209 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
210 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
211
212 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
213 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
214
215 if (setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
216 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
217
218 if (setsockopt(sv[1], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
219 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
220
221 /*
222 * Request peer credentials. This musr be done before first
223 * frame is sent.
224 */
225 opt = 1;
226 optlen = sizeof(opt);
227 if (setsockopt(sv[1], 0, LOCAL_CREDS, &opt, optlen) != 0)
228 DWARN("%s: setsockopt LOCAL_CREDS failed", __func__);
229
230 (void)sprintf(fdstr, "%d", sv[1]);
231
232 switch(fork()) {
233 case -1:
234 #ifdef PERFUSE_DEBUG
235 DWARN("%s:%d: fork failed", __func__, __LINE__);
236 #endif
237 return -1;
238 /* NOTREACHED */
239 break;
240 case 0:
241 (void)close(sv[0]);
242 (void)execve(argv[0], argv, environ);
243 #ifdef PERFUSE_DEBUG
244 DWARN("%s:%d: execve failed", __func__, __LINE__);
245 #endif
246 return -1;
247 /* NOTREACHED */
248 break;
249 default:
250 break;
251 }
252
253 (void)close(sv[1]);
254 return sv[0];
255 }
256
257 int
258 perfuse_mount(const char *source, const char *target,
259 const char *filesystemtype, long mountflags, const void *data)
260 {
261 int s;
262 size_t len;
263 struct perfuse_mount_out *pmo;
264 struct sockaddr_storage ss;
265 struct sockaddr_un *sun;
266 struct sockaddr *sa;
267 socklen_t sa_len;
268 size_t sock_len;
269 char *frame;
270 char *cp;
271
272 #ifdef PERFUSE_DEBUG
273 if (perfuse_diagflags & PDF_MISC)
274 DPRINTF("%s(\"%s\", \"%s\", \"%s\", 0x%lx, \"%s\")\n",
275 __func__, source, target, filesystemtype,
276 mountflags, (const char *)data);
277 #endif
278
279 if ((s = get_fd(data)) == -1)
280 return -1;
281
282 /*
283 * If we are connected to /dev/fuse, we need a second
284 * socket to get replies from perfused.
285 * XXX This socket is not removed at exit time yet
286 */
287 sock_len = 0;
288 sa = (struct sockaddr *)(void *)&ss;
289 sun = (struct sockaddr_un *)(void *)&ss;
290 sa_len = sizeof(ss);
291 if ((getpeername(s, sa, &sa_len) == 0) &&
292 (sa->sa_family = AF_LOCAL) &&
293 (strcmp(sun->sun_path, _PATH_FUSE) == 0)) {
294
295 sun->sun_len = sizeof(*sun);
296 sun->sun_family = AF_LOCAL;
297 (void)sprintf(sun->sun_path, "%s/%s-%d",
298 _PATH_TMP, getprogname(), getpid());
299
300 if (bind(s, sa, (socklen_t)sa->sa_len) != 0)
301 DERR(EX_OSERR, "%s:%d bind to \"%s\" failed",
302 __func__, __LINE__, sun->sun_path);
303
304 sock_len = strlen(sun->sun_path) + 1;
305 }
306
307 len = sizeof(*pmo);
308 len += source ? (uint32_t)strlen(source) + 1 : 0;
309 len += target ? (uint32_t)strlen(target) + 1 : 0;
310 len += filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0;
311 len += data ? (uint32_t)strlen(data) + 1 : 0;
312 len += sock_len;
313
314 if ((frame = malloc(len)) == NULL) {
315 #ifdef PERFUSE_DEBUG
316 if (perfuse_diagflags & PDF_MISC)
317 DWARN("%s:%d malloc failed", __func__, __LINE__);
318 #endif
319 return -1;
320 }
321
322 pmo = (struct perfuse_mount_out *)(void *)frame;
323 pmo->pmo_len = (uint32_t)len;
324 pmo->pmo_error = 0;
325 pmo->pmo_unique = (uint64_t)-1;
326 (void)strcpy(pmo->pmo_magic, PERFUSE_MOUNT_MAGIC);
327
328 pmo->pmo_source_len = source ? (uint32_t)strlen(source) + 1 : 0;
329 pmo->pmo_target_len = target ? (uint32_t)strlen(target) + 1: 0;
330 pmo->pmo_filesystemtype_len =
331 filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0;
332 pmo->pmo_mountflags = (uint32_t)mountflags;
333 pmo->pmo_data_len = data ? (uint32_t)strlen(data) + 1 : 0;
334 pmo->pmo_sock_len = (uint32_t)sock_len;
335
336 cp = (char *)(void *)(pmo + 1);
337
338 if (source) {
339 (void)strcpy(cp, source);
340 cp += pmo->pmo_source_len;
341 }
342
343 if (target) {
344 (void)strcpy(cp, target);
345 cp += pmo->pmo_target_len;
346 }
347
348 if (filesystemtype) {
349 (void)strcpy(cp, filesystemtype);
350 cp += pmo->pmo_filesystemtype_len;
351 }
352
353 if (data) {
354 (void)strcpy(cp, data);
355 cp += pmo->pmo_data_len;
356 }
357
358 if (sock_len != 0) {
359 (void)strcpy(cp, sun->sun_path);
360 cp += pmo->pmo_sock_len;
361 }
362
363 if (send(s, frame, len, MSG_NOSIGNAL) != (ssize_t)len) {
364 #ifdef PERFUSE_DEBUG
365 DWARN("%s:%d sendto failed", __func__, __LINE__);
366 #endif
367 return -1;
368 }
369
370 return 0;
371 }
372
373
374 uint64_t
375 perfuse_next_unique(struct puffs_usermount *pu)
376 {
377 struct perfuse_state *ps;
378
379 ps = puffs_getspecific(pu);
380
381 return ps->ps_unique++;
382 }
383
384 struct puffs_usermount *
385 perfuse_init(struct perfuse_callbacks *pc, struct perfuse_mount_info *pmi)
386 {
387 struct perfuse_state *ps;
388 struct puffs_usermount *pu;
389 struct puffs_ops *pops;
390 const char *source = _PATH_PUFFS;
391 char *fstype;
392 unsigned int puffs_flags;
393 struct puffs_node *pn_root;
394 struct puffs_pathobj *po_root;
395 struct rlimit rl;
396
397 /*
398 * perfused can grow quite large, let assume there's enough ram ...
399 */
400 if (getrlimit(RLIMIT_DATA, &rl) < 0) {
401 DERR(EX_OSERR, "%s: getrlimit failed: %s", __func__,
402 strerror(errno));
403 } else {
404 rl.rlim_cur = rl.rlim_max;
405 if (setrlimit(RLIMIT_DATA, &rl) < 0) {
406 DERR(EX_OSERR, "%s: setrlimit failed: %s", __func__,
407 strerror(errno));
408 }
409 }
410
411
412 ps = init_state();
413 ps->ps_owner_uid = pmi->pmi_uid;
414
415 if (pmi->pmi_source) {
416 if ((ps->ps_source = strdup(pmi->pmi_source)) == NULL)
417 DERR(EX_OSERR, "%s: strdup failed", __func__);
418
419 source = ps->ps_source;
420 }
421
422 if (pmi->pmi_filesystemtype) {
423 size_t len;
424
425 ps->ps_filesystemtype = strdup(pmi->pmi_filesystemtype);
426 if (ps->ps_filesystemtype == NULL)
427 DERR(EX_OSERR, "%s: strdup failed", __func__);
428
429 len = sizeof("perfuse|") + strlen(ps->ps_filesystemtype) + 1;
430 if ((fstype = malloc(len)) == NULL)
431 DERR(EX_OSERR, "%s: malloc failed", __func__);
432
433 (void)sprintf(fstype, "perfuse|%s", ps->ps_filesystemtype);
434 } else {
435 if ((fstype = strdup("perfuse")) == NULL)
436 DERR(EX_OSERR, "%s: strdup failed", __func__);
437 }
438
439 if ((ps->ps_target = strdup(pmi->pmi_target)) == NULL)
440 DERR(EX_OSERR, "%s: strdup failed", __func__);
441
442 ps->ps_mountflags = pmi->pmi_mountflags;
443
444 /*
445 * Some options are forbidden for non root users
446 */
447 if (ps->ps_owner_uid != 0)
448 ps->ps_mountflags |= MNT_NOSUID|MNT_NODEV;
449
450 PUFFSOP_INIT(pops);
451 PUFFSOP_SET(pops, perfuse, fs, unmount);
452 PUFFSOP_SET(pops, perfuse, fs, statvfs);
453 PUFFSOP_SET(pops, perfuse, fs, sync);
454 PUFFSOP_SET(pops, perfuse, node, lookup);
455 PUFFSOP_SET(pops, perfuse, node, create);
456 PUFFSOP_SET(pops, perfuse, node, mknod);
457 PUFFSOP_SET(pops, perfuse, node, open);
458 PUFFSOP_SET(pops, perfuse, node, close);
459 PUFFSOP_SET(pops, perfuse, node, access);
460 PUFFSOP_SET(pops, perfuse, node, getattr);
461 PUFFSOP_SET(pops, perfuse, node, setattr);
462 PUFFSOP_SET(pops, perfuse, node, poll);
463 #if 0
464 PUFFSOP_SET(pops, perfuse, node, mmap);
465 #endif
466 PUFFSOP_SET(pops, perfuse, node, fsync);
467 PUFFSOP_SET(pops, perfuse, node, seek);
468 PUFFSOP_SET(pops, perfuse, node, remove);
469 PUFFSOP_SET(pops, perfuse, node, link);
470 PUFFSOP_SET(pops, perfuse, node, rename);
471 PUFFSOP_SET(pops, perfuse, node, mkdir);
472 PUFFSOP_SET(pops, perfuse, node, rmdir);
473 PUFFSOP_SET(pops, perfuse, node, symlink);
474 PUFFSOP_SET(pops, perfuse, node, readdir);
475 PUFFSOP_SET(pops, perfuse, node, readlink);
476 PUFFSOP_SET(pops, perfuse, node, reclaim);
477 PUFFSOP_SET(pops, perfuse, node, inactive);
478 PUFFSOP_SET(pops, perfuse, node, print);
479 PUFFSOP_SET(pops, perfuse, node, advlock);
480 PUFFSOP_SET(pops, perfuse, node, read);
481 PUFFSOP_SET(pops, perfuse, node, write);
482 #ifdef PUFFS_EXTNAMELEN
483 PUFFSOP_SET(pops, perfuse, node, getextattr);
484 PUFFSOP_SET(pops, perfuse, node, setextattr);
485 PUFFSOP_SET(pops, perfuse, node, listextattr);
486 PUFFSOP_SET(pops, perfuse, node, deleteextattr);
487 #endif /* PUFFS_EXTNAMELEN */
488
489 /*
490 * We used to have PUFFS_KFLAG_WTCACHE here, which uses the
491 * page cache (highly desirable to get mmap(2)), but still sends
492 * all writes to the filesystem. In fact it does not send the
493 * data written, but the pages that contain it.
494 *
495 * There is a nasty bug hidden somewhere, possibly in libpuffs'
496 * VOP_FSYNC, which sends an asynchronous PUFFS_SETATTR that
497 * update file size. When writes are in progress, it will cause
498 * the file to be truncated and we get a zero-filled chunk at the
499 * beginning of a page. Removing PUFFS_KFLAG_WTCACHE fixes that
500 * problem.
501 *
502 * The other consequences are that changes will not be propagated
503 * immediatly to the filesystem, and we get a huge performance gain
504 * because much less requests are sent. A test case for the above
505 * mentioned bug got its execution time slashed by factor 50.
506 *
507 * PUFFS_KFLAG_NOCACHE_NAME is required so that we can see changes
508 * done by other machines in networked filesystems.
509 */
510 puffs_flags = PUFFS_KFLAG_NOCACHE_NAME;
511
512 if (perfuse_diagflags & PDF_PUFFS)
513 puffs_flags |= PUFFS_FLAG_OPDUMP;
514
515 if ((pu = puffs_init(pops, source, fstype, ps, puffs_flags)) == NULL)
516 DERR(EX_OSERR, "%s: puffs_init failed", __func__);
517
518 ps->ps_pu = pu;
519
520 /*
521 * Setup filesystem root
522 */
523 pn_root = perfuse_new_pn(pu, "", NULL);
524 PERFUSE_NODE_DATA(pn_root)->pnd_nodeid = FUSE_ROOT_ID;
525 PERFUSE_NODE_DATA(pn_root)->pnd_parent = pn_root;
526 puffs_setroot(pu, pn_root);
527 ps->ps_fsid = pn_root->pn_va.va_fsid;
528
529 po_root = puffs_getrootpathobj(pu);
530 if ((po_root->po_path = strdup("/")) == NULL)
531 DERRX(EX_OSERR, "perfuse_mount_start() failed");
532
533 po_root->po_len = 1;
534 puffs_path_buildhash(pu, po_root);
535
536 puffs_vattr_null(&pn_root->pn_va);
537 pn_root->pn_va.va_type = VDIR;
538 pn_root->pn_va.va_mode = 0755;
539 pn_root->pn_va.va_fileid = FUSE_ROOT_ID;
540
541 ps->ps_root = pn_root;
542
543 /*
544 * Callbacks
545 */
546 ps->ps_new_msg = pc->pc_new_msg;
547 ps->ps_xchg_msg = pc->pc_xchg_msg;
548 ps->ps_destroy_msg = pc->pc_destroy_msg;
549 ps->ps_get_inhdr = pc->pc_get_inhdr;
550 ps->ps_get_inpayload = pc->pc_get_inpayload;
551 ps->ps_get_outhdr = pc->pc_get_outhdr;
552 ps->ps_get_outpayload = pc->pc_get_outpayload;
553 ps->ps_umount = pc->pc_umount;
554
555 return pu;
556 }
557
558 void
559 perfuse_setspecific(struct puffs_usermount *pu, void *priv)
560 {
561 struct perfuse_state *ps;
562
563 ps = puffs_getspecific(pu);
564 ps->ps_private = priv;
565
566 return;
567 }
568
569 void *
570 perfuse_getspecific(struct puffs_usermount *pu)
571 {
572 struct perfuse_state *ps;
573
574 ps = puffs_getspecific(pu);
575
576 return ps->ps_private;
577 }
578
579 int
580 perfuse_inloop(struct puffs_usermount *pu)
581 {
582 struct perfuse_state *ps;
583
584 ps = puffs_getspecific(pu);
585
586 return ps->ps_flags & PS_INLOOP;
587 }
588
589 int
590 perfuse_mainloop(struct puffs_usermount *pu)
591 {
592 struct perfuse_state *ps;
593
594 ps = puffs_getspecific(pu);
595
596 ps->ps_flags |= PS_INLOOP;
597 if (puffs_mainloop(ps->ps_pu) != 0) {
598 DERR(EX_OSERR, "%s: failed", __func__);
599 return -1;
600 }
601
602 /*
603 * Normal exit after unmount
604 */
605 return 0;
606 }
607
608 /* ARGSUSED0 */
609 uint64_t
610 perfuse_get_nodeid(struct puffs_usermount *pu, puffs_cookie_t opc)
611 {
612 return PERFUSE_NODE_DATA(opc)->pnd_nodeid;
613 }
614
615 int
616 perfuse_unmount(struct puffs_usermount *pu)
617 {
618 struct perfuse_state *ps;
619
620 ps = puffs_getspecific(pu);
621
622 return unmount(ps->ps_target, MNT_FORCE);
623 }
624