perfuse.c revision 1.20 1 /* $NetBSD: perfuse.c,v 1.20 2011/09/09 22:51:44 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <fcntl.h>
32 #include <string.h>
33 #include <errno.h>
34 #include <puffs.h>
35 #include <sys/types.h>
36 #include <sys/mman.h>
37 #include <sys/socket.h>
38 #include <sys/extattr.h>
39 #include <sys/un.h>
40 #include <machine/vmparam.h>
41
42 #define LIBPERFUSE
43 #include "perfuse.h"
44 #include "perfuse_if.h"
45 #include "perfuse_priv.h"
46
47 int perfuse_diagflags = 0; /* global used only in DPRINTF/DERR/DWARN */
48 extern char **environ;
49
50 static struct perfuse_state *init_state(void);
51 static int get_fd(const char *);
52
53
54 static struct perfuse_state *
55 init_state(void)
56 {
57 struct perfuse_state *ps;
58 char opts[1024];
59
60 if ((ps = malloc(sizeof(*ps))) == NULL)
61 DERR(EX_OSERR, "%s: malloc failed", __func__);
62
63 (void)memset(ps, 0, sizeof(*ps));
64 ps->ps_max_write = UINT_MAX;
65 ps->ps_max_readahead = UINT_MAX;
66
67 /*
68 * Most of the time, access() is broken because the filesystem
69 * performs the check with root privileges. glusterfs will do that
70 * if the Linux-specific setfsuid() is missing, for instance.
71 */
72 ps->ps_flags |= PS_NO_ACCESS;
73
74 /*
75 * This is a temporary way to toggle access and creat usage.
76 * It would be nice if that could be provided as mount options,
77 * but that will not be obvious to do.
78 */
79 if (getenv_r("PERFUSE_OPTIONS", opts, sizeof(opts)) != -1) {
80 char *optname;
81 char *last;
82
83 for ((optname = strtok_r(opts, ",", &last));
84 optname != NULL;
85 (optname = strtok_r(NULL, ",", &last))) {
86 if (strcmp(optname, "enable_access") == 0)
87 ps->ps_flags &= ~PS_NO_ACCESS;
88
89 if (strcmp(optname, "disable_access") == 0)
90 ps->ps_flags |= PS_NO_ACCESS;
91
92 if (strcmp(optname, "enable_creat") == 0)
93 ps->ps_flags &= ~PS_NO_CREAT;
94
95 if (strcmp(optname, "disable_creat") == 0)
96 ps->ps_flags |= PS_NO_CREAT;
97 }
98 }
99
100
101 return ps;
102 }
103
104
105 static int
106 get_fd(data)
107 const char *data;
108 {
109 char *string;
110 const char fdopt[] = "fd=";
111 char *lastp;
112 char *opt;
113 int fd = -1;
114
115 if ((string = strdup(data)) == NULL)
116 return -1;
117
118 for (opt = strtok_r(string, ",", &lastp);
119 opt != NULL;
120 opt = strtok_r(NULL, ",", &lastp)) {
121 if (strncmp(opt, fdopt, strlen(fdopt)) == 0) {
122 fd = atoi(opt + strlen(fdopt));
123 break;
124 }
125 }
126
127 /*
128 * No file descriptor found
129 */
130 if (fd == -1)
131 errno = EINVAL;
132
133 free(string);
134 return fd;
135
136 }
137
138 int
139 perfuse_open(path, flags, mode)
140 const char *path;
141 int flags;
142 mode_t mode;
143 {
144 int sv[2];
145 struct sockaddr_un sun;
146 struct sockaddr *sa;
147 char progname[] = _PATH_PERFUSED;
148 char minus_i[] = "-i";
149 char fdstr[16];
150 char *const argv[] = { progname, minus_i, fdstr, NULL};
151 uint32_t opt;
152 uint32_t optlen;
153 int sock_type = SOCK_SEQPACKET;
154
155 if (strcmp(path, _PATH_FUSE) != 0)
156 return open(path, flags, mode);
157
158 /*
159 * Try SOCK_SEQPACKET then SOCK_DGRAM if unavailable
160 */
161 if ((sv[0] = socket(PF_LOCAL, SOCK_SEQPACKET, 0)) == -1) {
162 sock_type = SOCK_DGRAM;
163 DWARNX("SEQPACKET local sockets unavailable, using less "
164 "reliable DGRAM sockets. Expect file operation hangs.");
165
166 if ((sv[0] = socket(PF_LOCAL, SOCK_DGRAM, 0)) == -1) {
167 #ifdef PERFUSE_DEBUG
168 DWARN("%s: %d socket failed", __func__, __LINE__);
169 #endif
170 return -1;
171 }
172 }
173
174 /*
175 * Set a buffer lentgh large enough so that any FUSE packet
176 * will fit.
177 */
178 opt = (uint32_t)FUSE_BUFSIZE;
179 optlen = sizeof(opt);
180 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
181 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
182
183 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
184 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
185
186 sa = (struct sockaddr *)(void *)&sun;
187 sun.sun_len = sizeof(sun);
188 sun.sun_family = AF_LOCAL;
189 (void)strcpy(sun.sun_path, path);
190
191 if (connect(sv[0], sa, (socklen_t)sun.sun_len) == 0)
192 return sv[0];
193
194 /*
195 * Attempt to run perfused on our own
196 * if it does not run yet; In that case
197 * we will talk using a socketpair
198 * instead of /dev/fuse.
199 */
200 if (socketpair(PF_LOCAL, sock_type, 0, sv) != 0) {
201 DWARN("%s:%d: socketpair failed", __func__, __LINE__);
202 return -1;
203 }
204
205 /*
206 * Set a buffer lentgh large enough so that any FUSE packet
207 * will fit.
208 */
209 opt = (uint32_t)(4 * FUSE_BUFSIZE);
210 optlen = sizeof(opt);
211 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
212 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
213
214 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
215 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
216
217 if (setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
218 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);
219
220 if (setsockopt(sv[1], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
221 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);
222
223 /*
224 * Request peer credentials. This musr be done before first
225 * frame is sent.
226 */
227 opt = 1;
228 optlen = sizeof(opt);
229 if (setsockopt(sv[1], 0, LOCAL_CREDS, &opt, optlen) != 0)
230 DWARN("%s: setsockopt LOCAL_CREDS failed", __func__);
231
232 (void)sprintf(fdstr, "%d", sv[1]);
233
234 switch(fork()) {
235 case -1:
236 #ifdef PERFUSE_DEBUG
237 DWARN("%s:%d: fork failed", __func__, __LINE__);
238 #endif
239 return -1;
240 /* NOTREACHED */
241 break;
242 case 0:
243 (void)execve(argv[0], argv, environ);
244 #ifdef PERFUSE_DEBUG
245 DWARN("%s:%d: execve failed", __func__, __LINE__);
246 #endif
247 return -1;
248 /* NOTREACHED */
249 break;
250 default:
251 break;
252 }
253
254 return sv[0];
255 }
256
257 int
258 perfuse_mount(source, target, filesystemtype, mountflags, data)
259 const char *source;
260 const char *target;
261 const char *filesystemtype;
262 long mountflags;
263 const void *data;
264 {
265 int s;
266 size_t len;
267 struct perfuse_mount_out *pmo;
268 struct sockaddr_storage ss;
269 struct sockaddr_un *sun;
270 struct sockaddr *sa;
271 socklen_t sa_len;
272 size_t sock_len;
273 char *frame;
274 char *cp;
275
276 #ifdef PERFUSE_DEBUG
277 if (perfuse_diagflags & PDF_MISC)
278 DPRINTF("%s(\"%s\", \"%s\", \"%s\", 0x%lx, \"%s\")\n",
279 __func__, source, target, filesystemtype,
280 mountflags, (const char *)data);
281 #endif
282
283 if ((s = get_fd(data)) == -1)
284 return -1;
285
286 /*
287 * If we are connected to /dev/fuse, we need a second
288 * socket to get replies from perfused.
289 * XXX This socket is not removed at exit time yet
290 */
291 sock_len = 0;
292 sa = (struct sockaddr *)(void *)&ss;
293 sun = (struct sockaddr_un *)(void *)&ss;
294 sa_len = sizeof(ss);
295 if ((getpeername(s, sa, &sa_len) == 0) &&
296 (sa->sa_family = AF_LOCAL) &&
297 (strcmp(sun->sun_path, _PATH_FUSE) == 0)) {
298
299 sun->sun_len = sizeof(*sun);
300 sun->sun_family = AF_LOCAL;
301 (void)sprintf(sun->sun_path, "%s/%s-%d",
302 _PATH_TMP, getprogname(), getpid());
303
304 if (bind(s, sa, (socklen_t)sa->sa_len) != 0)
305 DERR(EX_OSERR, "%s:%d bind to \"%s\" failed",
306 __func__, __LINE__, sun->sun_path);
307
308 sock_len = strlen(sun->sun_path) + 1;
309 }
310
311 len = sizeof(*pmo);
312 len += source ? (uint32_t)strlen(source) + 1 : 0;
313 len += target ? (uint32_t)strlen(target) + 1 : 0;
314 len += filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0;
315 len += data ? (uint32_t)strlen(data) + 1 : 0;
316 len += sock_len;
317
318 if ((frame = malloc(len)) == NULL) {
319 #ifdef PERFUSE_DEBUG
320 if (perfuse_diagflags & PDF_MISC)
321 DWARN("%s:%d malloc failed", __func__, __LINE__);
322 #endif
323 return -1;
324 }
325
326 pmo = (struct perfuse_mount_out *)(void *)frame;
327 pmo->pmo_len = (uint32_t)len;
328 pmo->pmo_error = 0;
329 pmo->pmo_unique = (uint64_t)-1;
330 (void)strcpy(pmo->pmo_magic, PERFUSE_MOUNT_MAGIC);
331
332 pmo->pmo_source_len = source ? (uint32_t)strlen(source) + 1 : 0;
333 pmo->pmo_target_len = target ? (uint32_t)strlen(target) + 1: 0;
334 pmo->pmo_filesystemtype_len =
335 filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0;
336 pmo->pmo_mountflags = (uint32_t)mountflags;
337 pmo->pmo_data_len = data ? (uint32_t)strlen(data) + 1 : 0;
338 pmo->pmo_sock_len = (uint32_t)sock_len;
339
340 cp = (char *)(void *)(pmo + 1);
341
342 if (source) {
343 (void)strcpy(cp, source);
344 cp += pmo->pmo_source_len;
345 }
346
347 if (target) {
348 (void)strcpy(cp, target);
349 cp += pmo->pmo_target_len;
350 }
351
352 if (filesystemtype) {
353 (void)strcpy(cp, filesystemtype);
354 cp += pmo->pmo_filesystemtype_len;
355 }
356
357 if (data) {
358 (void)strcpy(cp, data);
359 cp += pmo->pmo_data_len;
360 }
361
362 if (sock_len != 0) {
363 (void)strcpy(cp, sun->sun_path);
364 cp += pmo->pmo_sock_len;
365 }
366
367 if (send(s, frame, len, MSG_NOSIGNAL) != (ssize_t)len) {
368 #ifdef PERFUSE_DEBUG
369 DWARN("%s:%d sendto failed", __func__, __LINE__);
370 #endif
371 return -1;
372 }
373
374 return 0;
375 }
376
377
378 uint64_t
379 perfuse_next_unique(pu)
380 struct puffs_usermount *pu;
381 {
382 struct perfuse_state *ps;
383
384 ps = puffs_getspecific(pu);
385
386 return ps->ps_unique++;
387 }
388
389 struct puffs_usermount *
390 perfuse_init(pc, pmi)
391 struct perfuse_callbacks *pc;
392 struct perfuse_mount_info *pmi;
393 {
394 struct perfuse_state *ps;
395 struct puffs_usermount *pu;
396 struct puffs_ops *pops;
397 const char *source = _PATH_PUFFS;
398 char *fstype;
399 unsigned int puffs_flags;
400 struct puffs_node *pn_root;
401 struct puffs_pathobj *po_root;
402
403 /*
404 * perfused needs to remain in memory. If it gets
405 * swapped out, the kernel will deadlock when trying
406 * to free memory backed by the PUFFS filesystem
407 */
408 mlockall(MCL_CURRENT|MCL_FUTURE);
409
410 ps = init_state();
411 ps->ps_owner_uid = pmi->pmi_uid;
412
413 if (pmi->pmi_source) {
414 if ((ps->ps_source = strdup(pmi->pmi_source)) == NULL)
415 DERR(EX_OSERR, "%s: strdup failed", __func__);
416
417 source = ps->ps_source;
418 }
419
420 if (pmi->pmi_filesystemtype) {
421 size_t len;
422
423 ps->ps_filesystemtype = strdup(pmi->pmi_filesystemtype);
424 if (ps->ps_filesystemtype == NULL)
425 DERR(EX_OSERR, "%s: strdup failed", __func__);
426
427 len = sizeof("perfuse|") + strlen(ps->ps_filesystemtype) + 1;
428 if ((fstype = malloc(len)) == NULL)
429 DERR(EX_OSERR, "%s: malloc failed", __func__);
430
431 (void)sprintf(fstype, "perfuse|%s", ps->ps_filesystemtype);
432 } else {
433 if ((fstype = strdup("perfuse")) == NULL)
434 DERR(EX_OSERR, "%s: strdup failed", __func__);
435 }
436
437 if ((ps->ps_target = strdup(pmi->pmi_target)) == NULL)
438 DERR(EX_OSERR, "%s: strdup failed", __func__);
439
440 ps->ps_mountflags = pmi->pmi_mountflags;
441
442 /*
443 * Some options are forbidden for non root users
444 */
445 if (ps->ps_owner_uid != 0)
446 ps->ps_mountflags |= MNT_NOSUID|MNT_NODEV;
447
448 PUFFSOP_INIT(pops);
449 PUFFSOP_SET(pops, perfuse, fs, unmount);
450 PUFFSOP_SET(pops, perfuse, fs, statvfs);
451 PUFFSOP_SET(pops, perfuse, fs, sync);
452 PUFFSOP_SET(pops, perfuse, node, lookup);
453 PUFFSOP_SET(pops, perfuse, node, create);
454 PUFFSOP_SET(pops, perfuse, node, mknod);
455 PUFFSOP_SET(pops, perfuse, node, open);
456 PUFFSOP_SET(pops, perfuse, node, close);
457 PUFFSOP_SET(pops, perfuse, node, access);
458 PUFFSOP_SET(pops, perfuse, node, getattr);
459 PUFFSOP_SET(pops, perfuse, node, setattr);
460 PUFFSOP_SET(pops, perfuse, node, poll);
461 #if 0
462 PUFFSOP_SET(pops, perfuse, node, mmap);
463 #endif
464 PUFFSOP_SET(pops, perfuse, node, fsync);
465 PUFFSOP_SET(pops, perfuse, node, seek);
466 PUFFSOP_SET(pops, perfuse, node, remove);
467 PUFFSOP_SET(pops, perfuse, node, link);
468 PUFFSOP_SET(pops, perfuse, node, rename);
469 PUFFSOP_SET(pops, perfuse, node, mkdir);
470 PUFFSOP_SET(pops, perfuse, node, rmdir);
471 PUFFSOP_SET(pops, perfuse, node, symlink);
472 PUFFSOP_SET(pops, perfuse, node, readdir);
473 PUFFSOP_SET(pops, perfuse, node, readlink);
474 PUFFSOP_SET(pops, perfuse, node, reclaim);
475 PUFFSOP_SET(pops, perfuse, node, inactive);
476 PUFFSOP_SET(pops, perfuse, node, print);
477 PUFFSOP_SET(pops, perfuse, node, advlock);
478 PUFFSOP_SET(pops, perfuse, node, read);
479 PUFFSOP_SET(pops, perfuse, node, write);
480 #ifdef PUFFS_EXTNAMELEN
481 PUFFSOP_SET(pops, perfuse, node, getextattr);
482 PUFFSOP_SET(pops, perfuse, node, setextattr);
483 PUFFSOP_SET(pops, perfuse, node, listextattr);
484 PUFFSOP_SET(pops, perfuse, node, deleteextattr);
485 #endif /* PUFFS_EXTNAMELEN */
486
487 /*
488 * We used to have PUFFS_KFLAG_WTCACHE here, which uses the
489 * page cache (highly desirable to get mmap(2)), but still sends
490 * all writes to the filesystem. In fact it does not send the
491 * data written, but the pages that contain it.
492 *
493 * There is a nasty bug hidden somewhere, possibly in libpuffs'
494 * VOP_FSYNC, which sends an asynchronous PUFFS_SETATTR that
495 * update file size. When writes are in progress, it will cause
496 * the file to be truncated and we get a zero-filled chunk at the
497 * beginning of a page. Removing PUFFS_KFLAG_WTCACHE fixes that
498 * problem.
499 *
500 * The other consequences are that changes will not be propagated
501 * immediatly to the filesystem, and we get a huge performance gain
502 * because much less requests are sent. A test case for the above
503 * mentioned bug got its execution time slashed by factor 50.
504 */
505 puffs_flags = 0;
506
507 if (perfuse_diagflags & PDF_PUFFS)
508 puffs_flags |= PUFFS_FLAG_OPDUMP;
509
510 if ((pu = puffs_init(pops, source, fstype, ps, puffs_flags)) == NULL)
511 DERR(EX_OSERR, "%s: puffs_init failed", __func__);
512
513 ps->ps_pu = pu;
514
515 /*
516 * Setup filesystem root
517 */
518 pn_root = perfuse_new_pn(pu, "", NULL);
519 PERFUSE_NODE_DATA(pn_root)->pnd_ino = FUSE_ROOT_ID;
520 PERFUSE_NODE_DATA(pn_root)->pnd_parent = pn_root;
521 puffs_setroot(pu, pn_root);
522 ps->ps_fsid = pn_root->pn_va.va_fsid;
523
524 po_root = puffs_getrootpathobj(pu);
525 if ((po_root->po_path = strdup("/")) == NULL)
526 DERRX(EX_OSERR, "perfuse_mount_start() failed");
527
528 po_root->po_len = 1;
529 puffs_path_buildhash(pu, po_root);
530
531 puffs_vattr_null(&pn_root->pn_va);
532 pn_root->pn_va.va_type = VDIR;
533 pn_root->pn_va.va_mode = 0755;
534
535 ps->ps_root = pn_root;
536
537 /*
538 * Callbacks
539 */
540 ps->ps_new_msg = pc->pc_new_msg;
541 ps->ps_xchg_msg = pc->pc_xchg_msg;
542 ps->ps_destroy_msg = pc->pc_destroy_msg;
543 ps->ps_get_inhdr = pc->pc_get_inhdr;
544 ps->ps_get_inpayload = pc->pc_get_inpayload;
545 ps->ps_get_outhdr = pc->pc_get_outhdr;
546 ps->ps_get_outpayload = pc->pc_get_outpayload;
547 ps->ps_umount = pc->pc_umount;
548
549 return pu;
550 }
551
552 void
553 perfuse_setspecific(pu, priv)
554 struct puffs_usermount *pu;
555 void *priv;
556 {
557 struct perfuse_state *ps;
558
559 ps = puffs_getspecific(pu);
560 ps->ps_private = priv;
561
562 return;
563 }
564
565 void *
566 perfuse_getspecific(pu)
567 struct puffs_usermount *pu;
568 {
569 struct perfuse_state *ps;
570
571 ps = puffs_getspecific(pu);
572
573 return ps->ps_private;
574 }
575
576 int
577 perfuse_inloop(pu)
578 struct puffs_usermount *pu;
579 {
580 struct perfuse_state *ps;
581
582 ps = puffs_getspecific(pu);
583
584 return ps->ps_flags & PS_INLOOP;
585 }
586
587 int
588 perfuse_mainloop(pu)
589 struct puffs_usermount *pu;
590 {
591 struct perfuse_state *ps;
592
593 ps = puffs_getspecific(pu);
594
595 ps->ps_flags |= PS_INLOOP;
596 if (puffs_mainloop(ps->ps_pu) != 0) {
597 DERR(EX_OSERR, "%s: failed", __func__);
598 return -1;
599 }
600
601 /*
602 * Normal exit after unmount
603 */
604 return 0;
605 }
606
607 /* ARGSUSED0 */
608 uint64_t
609 perfuse_get_ino(pu, opc)
610 struct puffs_usermount *pu;
611 puffs_cookie_t opc;
612 {
613 return PERFUSE_NODE_DATA(opc)->pnd_ino;
614 }
615
616 int
617 perfuse_unmount(pu)
618 struct puffs_usermount *pu;
619 {
620 struct perfuse_state *ps;
621
622 ps = puffs_getspecific(pu);
623
624 return unmount(ps->ps_target, MNT_FORCE);
625 }
626