rumpfs.c revision 1.62 1 /* $NetBSD: rumpfs.c,v 1.62 2010/07/13 18:08:58 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: rumpfs.c,v 1.62 2010/07/13 18:08:58 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/dirent.h>
34 #include <sys/errno.h>
35 #include <sys/filedesc.h>
36 #include <sys/fcntl.h>
37 #include <sys/kauth.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/mount.h>
41 #include <sys/namei.h>
42 #include <sys/lock.h>
43 #include <sys/lockf.h>
44 #include <sys/queue.h>
45 #include <sys/stat.h>
46 #include <sys/syscallargs.h>
47 #include <sys/vnode.h>
48
49 #include <miscfs/fifofs/fifo.h>
50 #include <miscfs/specfs/specdev.h>
51 #include <miscfs/genfs/genfs.h>
52
53 #include <rump/rumpuser.h>
54
55 #include "rump_private.h"
56 #include "rump_vfs_private.h"
57
58 static int rump_vop_lookup(void *);
59 static int rump_vop_getattr(void *);
60 static int rump_vop_mkdir(void *);
61 static int rump_vop_rmdir(void *);
62 static int rump_vop_mknod(void *);
63 static int rump_vop_create(void *);
64 static int rump_vop_inactive(void *);
65 static int rump_vop_reclaim(void *);
66 static int rump_vop_success(void *);
67 static int rump_vop_readdir(void *);
68 static int rump_vop_spec(void *);
69 static int rump_vop_read(void *);
70 static int rump_vop_write(void *);
71 static int rump_vop_open(void *);
72 static int rump_vop_symlink(void *);
73 static int rump_vop_readlink(void *);
74 static int rump_vop_whiteout(void *);
75
76 int (**fifo_vnodeop_p)(void *);
77 const struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
78 { &vop_default_desc, vn_default_error },
79 { NULL, NULL }
80 };
81 const struct vnodeopv_desc fifo_vnodeop_opv_desc =
82 { &fifo_vnodeop_p, fifo_vnodeop_entries };
83
84 int (**rump_vnodeop_p)(void *);
85 const struct vnodeopv_entry_desc rump_vnodeop_entries[] = {
86 { &vop_default_desc, vn_default_error },
87 { &vop_lookup_desc, rump_vop_lookup },
88 { &vop_getattr_desc, rump_vop_getattr },
89 { &vop_mkdir_desc, rump_vop_mkdir },
90 { &vop_rmdir_desc, rump_vop_rmdir },
91 { &vop_mknod_desc, rump_vop_mknod },
92 { &vop_create_desc, rump_vop_create },
93 { &vop_symlink_desc, rump_vop_symlink },
94 { &vop_readlink_desc, rump_vop_readlink },
95 { &vop_access_desc, rump_vop_success },
96 { &vop_readdir_desc, rump_vop_readdir },
97 { &vop_read_desc, rump_vop_read },
98 { &vop_write_desc, rump_vop_write },
99 { &vop_open_desc, rump_vop_open },
100 { &vop_seek_desc, genfs_seek },
101 { &vop_putpages_desc, genfs_null_putpages },
102 { &vop_whiteout_desc, rump_vop_whiteout },
103 { &vop_fsync_desc, rump_vop_success },
104 { &vop_lock_desc, genfs_lock },
105 { &vop_unlock_desc, genfs_unlock },
106 { &vop_islocked_desc, genfs_islocked },
107 { &vop_inactive_desc, rump_vop_inactive },
108 { &vop_reclaim_desc, rump_vop_reclaim },
109 { NULL, NULL }
110 };
111 const struct vnodeopv_desc rump_vnodeop_opv_desc =
112 { &rump_vnodeop_p, rump_vnodeop_entries };
113
114 int (**rump_specop_p)(void *);
115 const struct vnodeopv_entry_desc rump_specop_entries[] = {
116 { &vop_default_desc, rump_vop_spec },
117 { NULL, NULL }
118 };
119 const struct vnodeopv_desc rump_specop_opv_desc =
120 { &rump_specop_p, rump_specop_entries };
121
122 const struct vnodeopv_desc * const rump_opv_descs[] = {
123 &rump_vnodeop_opv_desc,
124 &rump_specop_opv_desc,
125 NULL
126 };
127
128 #define RUMPFS_WHITEOUT NULL
129 #define RDENT_ISWHITEOUT(rdp) (rdp->rd_node == RUMPFS_WHITEOUT)
130 struct rumpfs_dent {
131 char *rd_name;
132 int rd_namelen;
133 struct rumpfs_node *rd_node;
134
135 LIST_ENTRY(rumpfs_dent) rd_entries;
136 };
137
138 struct rumpfs_node {
139 struct vattr rn_va;
140 struct vnode *rn_vp;
141 char *rn_hostpath;
142 int rn_flags;
143
144 union {
145 struct { /* VREG */
146 int readfd;
147 int writefd;
148 uint64_t offset;
149 } reg;
150 struct { /* VDIR */
151 LIST_HEAD(, rumpfs_dent) dents;
152 struct rumpfs_node *parent;
153 int flags;
154 } dir;
155 struct {
156 char *target;
157 size_t len;
158 } link;
159 } rn_u;
160 };
161 #define rn_readfd rn_u.reg.readfd
162 #define rn_writefd rn_u.reg.writefd
163 #define rn_offset rn_u.reg.offset
164 #define rn_dir rn_u.dir.dents
165 #define rn_parent rn_u.dir.parent
166 #define rn_linktarg rn_u.link.target
167 #define rn_linklen rn_u.link.len
168
169 #define RUMPNODE_CANRECLAIM 0x01
170 #define RUMPNODE_DIR_ET 0x02
171 #define RUMPNODE_DIR_ETSUBS 0x04
172
173 struct rumpfs_mount {
174 struct vnode *rfsmp_rvp;
175 };
176
177 static struct rumpfs_node *makeprivate(enum vtype, dev_t, off_t);
178
179 /*
180 * Extra Terrestrial stuff. We map a given key (pathname) to a file on
181 * the host FS. ET phones home only from the root node of rumpfs.
182 *
183 * When an etfs node is removed, a vnode potentially behind it is not
184 * immediately recycled.
185 */
186
187 struct etfs {
188 char et_key[MAXPATHLEN];
189 size_t et_keylen;
190 bool et_prefixkey;
191 bool et_removing;
192 devminor_t et_blkmin;
193
194 LIST_ENTRY(etfs) et_entries;
195
196 struct rumpfs_node *et_rn;
197 };
198 static kmutex_t etfs_lock;
199 static LIST_HEAD(, etfs) etfs_list = LIST_HEAD_INITIALIZER(etfs_list);
200
201 static enum vtype
202 ettype_to_vtype(enum rump_etfs_type et)
203 {
204 enum vtype vt;
205
206 switch (et) {
207 case RUMP_ETFS_REG:
208 vt = VREG;
209 break;
210 case RUMP_ETFS_BLK:
211 vt = VBLK;
212 break;
213 case RUMP_ETFS_CHR:
214 vt = VCHR;
215 break;
216 case RUMP_ETFS_DIR:
217 vt = VDIR;
218 break;
219 case RUMP_ETFS_DIR_SUBDIRS:
220 vt = VDIR;
221 break;
222 default:
223 panic("invalid et type: %d", et);
224 }
225
226 return vt;
227 }
228
229 static enum vtype
230 hft_to_vtype(int hft)
231 {
232 enum vtype vt;
233
234 switch (hft) {
235 case RUMPUSER_FT_OTHER:
236 vt = VNON;
237 break;
238 case RUMPUSER_FT_DIR:
239 vt = VDIR;
240 break;
241 case RUMPUSER_FT_REG:
242 vt = VREG;
243 break;
244 case RUMPUSER_FT_BLK:
245 vt = VBLK;
246 break;
247 case RUMPUSER_FT_CHR:
248 vt = VCHR;
249 break;
250 default:
251 vt = VNON;
252 break;
253 }
254
255 return vt;
256 }
257
258 static bool
259 etfs_find(const char *key, struct etfs **etp, bool forceprefix)
260 {
261 struct etfs *et;
262 size_t keylen = strlen(key);
263
264 KASSERT(mutex_owned(&etfs_lock));
265
266 LIST_FOREACH(et, &etfs_list, et_entries) {
267 if ((keylen == et->et_keylen || et->et_prefixkey || forceprefix)
268 && strncmp(key, et->et_key, et->et_keylen) == 0) {
269 if (etp)
270 *etp = et;
271 return true;
272 }
273 }
274
275 return false;
276 }
277
278 #define REGDIR(ftype) \
279 ((ftype) == RUMP_ETFS_DIR || (ftype) == RUMP_ETFS_DIR_SUBDIRS)
280 static int
281 doregister(const char *key, const char *hostpath,
282 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
283 {
284 struct etfs *et;
285 struct rumpfs_node *rn;
286 uint64_t fsize;
287 dev_t rdev = NODEV;
288 devminor_t dmin = -1;
289 int hft, error;
290
291 if (rumpuser_getfileinfo(hostpath, &fsize, &hft, &error))
292 return error;
293
294 /* etfs directory requires a directory on the host */
295 if (REGDIR(ftype)) {
296 if (hft != RUMPUSER_FT_DIR)
297 return ENOTDIR;
298 if (begin != 0)
299 return EISDIR;
300 if (size != RUMP_ETFS_SIZE_ENDOFF)
301 return EISDIR;
302 size = fsize;
303 } else {
304 if (begin > fsize)
305 return EINVAL;
306 if (size == RUMP_ETFS_SIZE_ENDOFF)
307 size = fsize - begin;
308 if (begin + size > fsize)
309 return EINVAL;
310 }
311
312 if (ftype == RUMP_ETFS_BLK || ftype == RUMP_ETFS_CHR) {
313 error = rumpblk_register(hostpath, &dmin, begin, size);
314 if (error != 0) {
315 return error;
316 }
317 rdev = makedev(RUMPBLK_DEVMAJOR, dmin);
318 }
319
320 et = kmem_alloc(sizeof(*et), KM_SLEEP);
321 strcpy(et->et_key, key);
322 et->et_keylen = strlen(et->et_key);
323 et->et_rn = rn = makeprivate(ettype_to_vtype(ftype), rdev, size);
324 et->et_removing = false;
325 et->et_blkmin = dmin;
326
327 if (ftype == RUMP_ETFS_REG || REGDIR(ftype) || et->et_blkmin != -1) {
328 size_t len = strlen(hostpath)+1;
329
330 rn->rn_hostpath = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
331 memcpy(rn->rn_hostpath, hostpath, len);
332 rn->rn_offset = begin;
333 }
334
335 if (REGDIR(ftype)) {
336 rn->rn_flags |= RUMPNODE_DIR_ET;
337 et->et_prefixkey = true;
338 } else {
339 et->et_prefixkey = false;
340 }
341
342 if (ftype == RUMP_ETFS_DIR_SUBDIRS)
343 rn->rn_flags |= RUMPNODE_DIR_ETSUBS;
344
345 mutex_enter(&etfs_lock);
346 if (etfs_find(key, NULL, REGDIR(ftype))) {
347 mutex_exit(&etfs_lock);
348 if (et->et_blkmin != -1)
349 rumpblk_deregister(hostpath);
350 if (et->et_rn->rn_hostpath != NULL)
351 free(et->et_rn->rn_hostpath, M_TEMP);
352 kmem_free(et->et_rn, sizeof(*et->et_rn));
353 kmem_free(et, sizeof(*et));
354 return EEXIST;
355 }
356 LIST_INSERT_HEAD(&etfs_list, et, et_entries);
357 mutex_exit(&etfs_lock);
358
359 return 0;
360 }
361 #undef REGDIR
362
363 int
364 rump_etfs_register(const char *key, const char *hostpath,
365 enum rump_etfs_type ftype)
366 {
367
368 return doregister(key, hostpath, ftype, 0, RUMP_ETFS_SIZE_ENDOFF);
369 }
370
371 int
372 rump_etfs_register_withsize(const char *key, const char *hostpath,
373 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
374 {
375
376 return doregister(key, hostpath, ftype, begin, size);
377 }
378
379 /* remove etfs mapping. caller's responsibility to make sure it's not in use */
380 int
381 rump_etfs_remove(const char *key)
382 {
383 struct etfs *et;
384 size_t keylen = strlen(key);
385 int rv;
386
387 mutex_enter(&etfs_lock);
388 LIST_FOREACH(et, &etfs_list, et_entries) {
389 if (keylen == et->et_keylen && strcmp(et->et_key, key) == 0) {
390 if (et->et_removing)
391 et = NULL;
392 else
393 et->et_removing = true;
394 break;
395 }
396 }
397 mutex_exit(&etfs_lock);
398 if (!et)
399 return ENOENT;
400
401 /*
402 * ok, we know what we want to remove and have signalled there
403 * actually are men at work. first, unregister from rumpblk
404 */
405 if (et->et_blkmin != -1) {
406 rv = rumpblk_deregister(et->et_rn->rn_hostpath);
407 } else {
408 rv = 0;
409 }
410 KASSERT(rv == 0);
411
412 /* then do the actual removal */
413 mutex_enter(&etfs_lock);
414 LIST_REMOVE(et, et_entries);
415 mutex_exit(&etfs_lock);
416
417 /* node is unreachable, safe to nuke all device copies */
418 if (et->et_blkmin != -1)
419 vdevgone(RUMPBLK_DEVMAJOR, et->et_blkmin, et->et_blkmin, VBLK);
420
421 if (et->et_rn->rn_hostpath != NULL)
422 free(et->et_rn->rn_hostpath, M_TEMP);
423 kmem_free(et->et_rn, sizeof(*et->et_rn));
424 kmem_free(et, sizeof(*et));
425
426 return 0;
427 }
428
429 /*
430 * rumpfs
431 */
432
433 #define INO_WHITEOUT 1
434 static int lastino = 2;
435 static kmutex_t reclock;
436
437 static struct rumpfs_node *
438 makeprivate(enum vtype vt, dev_t rdev, off_t size)
439 {
440 struct rumpfs_node *rn;
441 struct vattr *va;
442 struct timespec ts;
443
444 rn = kmem_zalloc(sizeof(*rn), KM_SLEEP);
445
446 switch (vt) {
447 case VDIR:
448 LIST_INIT(&rn->rn_dir);
449 break;
450 case VREG:
451 rn->rn_readfd = -1;
452 rn->rn_writefd = -1;
453 break;
454 default:
455 break;
456 }
457
458 nanotime(&ts);
459
460 va = &rn->rn_va;
461 va->va_type = vt;
462 va->va_mode = 0755;
463 if (vt == VDIR)
464 va->va_nlink = 2;
465 else
466 va->va_nlink = 1;
467 va->va_uid = 0;
468 va->va_gid = 0;
469 va->va_fsid =
470 va->va_fileid = atomic_inc_uint_nv(&lastino);
471 va->va_size = size;
472 va->va_blocksize = 512;
473 va->va_atime = ts;
474 va->va_mtime = ts;
475 va->va_ctime = ts;
476 va->va_birthtime = ts;
477 va->va_gen = 0;
478 va->va_flags = 0;
479 va->va_rdev = rdev;
480 va->va_bytes = 512;
481 va->va_filerev = 0;
482 va->va_vaflags = 0;
483
484 return rn;
485 }
486
487 static int
488 makevnode(struct mount *mp, struct rumpfs_node *rn, struct vnode **vpp)
489 {
490 struct vnode *vp;
491 int (**vpops)(void *);
492 struct vattr *va = &rn->rn_va;
493 int rv;
494
495 KASSERT(!mutex_owned(&reclock));
496
497 if (va->va_type == VCHR || va->va_type == VBLK) {
498 vpops = rump_specop_p;
499 } else {
500 vpops = rump_vnodeop_p;
501 }
502 if (vpops != rump_specop_p && va->va_type != VDIR
503 && !(va->va_type == VREG && rn->rn_hostpath != NULL)
504 && va->va_type != VSOCK && va->va_type != VLNK)
505 return EOPNOTSUPP;
506
507 rv = getnewvnode(VT_RUMP, mp, vpops, &vp);
508 if (rv)
509 return rv;
510
511 vp->v_size = vp->v_writesize = va->va_size;
512 vp->v_type = va->va_type;
513
514 if (vpops == rump_specop_p) {
515 spec_node_init(vp, va->va_rdev);
516 }
517 vp->v_data = rn;
518
519 vn_lock(vp, LK_RETRY | LK_EXCLUSIVE);
520 mutex_enter(&reclock);
521 rn->rn_vp = vp;
522 mutex_exit(&reclock);
523
524 *vpp = vp;
525
526 return 0;
527 }
528
529
530 static void
531 makedir(struct rumpfs_node *rnd,
532 struct componentname *cnp, struct rumpfs_node *rn)
533 {
534 struct rumpfs_dent *rdent;
535
536 rdent = kmem_alloc(sizeof(*rdent), KM_SLEEP);
537 rdent->rd_name = kmem_alloc(cnp->cn_namelen+1, KM_SLEEP);
538 rdent->rd_node = rn;
539 strlcpy(rdent->rd_name, cnp->cn_nameptr, cnp->cn_namelen+1);
540 rdent->rd_namelen = strlen(rdent->rd_name);
541
542 LIST_INSERT_HEAD(&rnd->rn_dir, rdent, rd_entries);
543 }
544
545 static void
546 freedir(struct rumpfs_node *rnd, struct componentname *cnp)
547 {
548 struct rumpfs_dent *rd = NULL;
549
550 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
551 if (rd->rd_namelen == cnp->cn_namelen &&
552 strncmp(rd->rd_name, cnp->cn_nameptr,
553 cnp->cn_namelen) == 0)
554 break;
555 }
556 if (rd == NULL)
557 panic("could not find directory entry: %s", cnp->cn_nameptr);
558
559 LIST_REMOVE(rd, rd_entries);
560 kmem_free(rd->rd_name, rd->rd_namelen+1);
561 kmem_free(rd, sizeof(*rd));
562 }
563
564 /*
565 * Simple lookup for rump file systems.
566 *
567 * uhm, this is twisted. C F C C, hope of C C F C looming
568 */
569 static int
570 rump_vop_lookup(void *v)
571 {
572 struct vop_lookup_args /* {
573 struct vnode *a_dvp;
574 struct vnode **a_vpp;
575 struct componentname *a_cnp;
576 }; */ *ap = v;
577 struct componentname *cnp = ap->a_cnp;
578 struct vnode *dvp = ap->a_dvp;
579 struct vnode **vpp = ap->a_vpp;
580 struct vnode *vp;
581 struct rumpfs_node *rnd = dvp->v_data, *rn;
582 struct rumpfs_dent *rd = NULL;
583 struct etfs *et;
584 bool dotdot = (cnp->cn_flags & ISDOTDOT) != 0;
585 int rv = 0;
586
587 /* check for dot, return directly if the case */
588 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
589 vref(dvp);
590 *vpp = dvp;
591 return 0;
592 }
593
594 /* we handle only some "non-special" cases */
595 if (!(((cnp->cn_flags & ISLASTCN) == 0) || (cnp->cn_nameiop != RENAME)))
596 return EOPNOTSUPP;
597
598 /* check for etfs */
599 if (dvp == rootvnode && cnp->cn_nameiop == LOOKUP) {
600 bool found;
601 mutex_enter(&etfs_lock);
602 found = etfs_find(cnp->cn_pnbuf, &et, false);
603 mutex_exit(&etfs_lock);
604
605 if (found) {
606 char *offset;
607
608 offset = strstr(cnp->cn_pnbuf, et->et_key);
609 KASSERT(offset);
610
611 rn = et->et_rn;
612 cnp->cn_consume += et->et_keylen
613 - (cnp->cn_nameptr - offset) - cnp->cn_namelen;
614 if (rn->rn_va.va_type != VDIR)
615 cnp->cn_flags &= ~REQUIREDIR;
616 goto getvnode;
617 }
618 }
619
620 if (rnd->rn_flags & RUMPNODE_DIR_ET) {
621 uint64_t fsize;
622 char *newpath;
623 size_t newpathlen;
624 int hft, error;
625
626 if (dotdot)
627 return EOPNOTSUPP;
628
629 newpathlen = strlen(rnd->rn_hostpath) + 1 + cnp->cn_namelen + 1;
630 newpath = malloc(newpathlen, M_TEMP, M_WAITOK);
631
632 strlcpy(newpath, rnd->rn_hostpath, newpathlen);
633 strlcat(newpath, "/", newpathlen);
634 strlcat(newpath, cnp->cn_nameptr, newpathlen);
635
636 if (rumpuser_getfileinfo(newpath, &fsize, &hft, &error)) {
637 free(newpath, M_TEMP);
638 return error;
639 }
640
641 /* allow only dirs and regular files */
642 if (hft != RUMPUSER_FT_REG && hft != RUMPUSER_FT_DIR) {
643 free(newpath, M_TEMP);
644 return ENOENT;
645 }
646
647 rn = makeprivate(hft_to_vtype(hft), NODEV, fsize);
648 rn->rn_flags |= RUMPNODE_CANRECLAIM;
649 if (rnd->rn_flags & RUMPNODE_DIR_ETSUBS) {
650 rn->rn_flags |= RUMPNODE_DIR_ET | RUMPNODE_DIR_ETSUBS;
651 }
652 rn->rn_hostpath = newpath;
653
654 goto getvnode;
655 } else {
656 if (dotdot) {
657 rn = rnd->rn_parent;
658 goto getvnode;
659 } else {
660 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
661 if (rd->rd_namelen == cnp->cn_namelen &&
662 strncmp(rd->rd_name, cnp->cn_nameptr,
663 cnp->cn_namelen) == 0)
664 break;
665 }
666 }
667 }
668
669 if (!rd && ((cnp->cn_flags & ISLASTCN) == 0||cnp->cn_nameiop != CREATE))
670 return ENOENT;
671
672 if (!rd && (cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
673 cnp->cn_flags |= SAVENAME;
674 return EJUSTRETURN;
675 }
676 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == DELETE)
677 cnp->cn_flags |= SAVENAME;
678
679 rn = rd->rd_node;
680
681 getvnode:
682 KASSERT(rn);
683 if (dotdot)
684 VOP_UNLOCK(dvp);
685 mutex_enter(&reclock);
686 if ((vp = rn->rn_vp)) {
687 mutex_enter(&vp->v_interlock);
688 mutex_exit(&reclock);
689 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
690 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
691 goto getvnode;
692 }
693 *vpp = vp;
694 } else {
695 mutex_exit(&reclock);
696 rv = makevnode(dvp->v_mount, rn, vpp);
697 }
698 if (dotdot)
699 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
700
701 return rv;
702 }
703
704 static int
705 rump_vop_getattr(void *v)
706 {
707 struct vop_getattr_args /* {
708 struct vnode *a_vp;
709 struct vattr *a_vap;
710 kauth_cred_t a_cred;
711 } */ *ap = v;
712 struct rumpfs_node *rn = ap->a_vp->v_data;
713
714 memcpy(ap->a_vap, &rn->rn_va, sizeof(struct vattr));
715 return 0;
716 }
717
718 static int
719 rump_vop_mkdir(void *v)
720 {
721 struct vop_mkdir_args /* {
722 struct vnode *a_dvp;
723 struct vnode **a_vpp;
724 struct componentname *a_cnp;
725 struct vattr *a_vap;
726 }; */ *ap = v;
727 struct vnode *dvp = ap->a_dvp;
728 struct vnode **vpp = ap->a_vpp;
729 struct componentname *cnp = ap->a_cnp;
730 struct rumpfs_node *rnd = dvp->v_data, *rn;
731 int rv = 0;
732
733 rn = makeprivate(VDIR, NODEV, DEV_BSIZE);
734 rn->rn_parent = rnd;
735 rv = makevnode(dvp->v_mount, rn, vpp);
736 if (rv)
737 goto out;
738
739 makedir(rnd, cnp, rn);
740
741 out:
742 PNBUF_PUT(cnp->cn_pnbuf);
743 vput(dvp);
744 return rv;
745 }
746
747 static int
748 rump_vop_rmdir(void *v)
749 {
750 struct vop_rmdir_args /* {
751 struct vnode *a_dvp;
752 struct vnode *a_vp;
753 struct componentname *a_cnp;
754 }; */ *ap = v;
755 struct vnode *dvp = ap->a_dvp;
756 struct vnode *vp = ap->a_vp;
757 struct componentname *cnp = ap->a_cnp;
758 struct rumpfs_node *rnd = dvp->v_data;
759 struct rumpfs_node *rn = vp->v_data;
760 int rv = 0;
761
762 if (!LIST_EMPTY(&rn->rn_dir)) {
763 rv = ENOTEMPTY;
764 goto out;
765 }
766
767 freedir(rnd, cnp);
768 rn->rn_flags |= RUMPNODE_CANRECLAIM;
769
770 out:
771 PNBUF_PUT(cnp->cn_pnbuf);
772 vput(dvp);
773 vput(vp);
774
775 return rv;
776 }
777
778 static int
779 rump_vop_mknod(void *v)
780 {
781 struct vop_mknod_args /* {
782 struct vnode *a_dvp;
783 struct vnode **a_vpp;
784 struct componentname *a_cnp;
785 struct vattr *a_vap;
786 }; */ *ap = v;
787 struct vnode *dvp = ap->a_dvp;
788 struct vnode **vpp = ap->a_vpp;
789 struct componentname *cnp = ap->a_cnp;
790 struct vattr *va = ap->a_vap;
791 struct rumpfs_node *rnd = dvp->v_data, *rn;
792 int rv;
793
794 rn = makeprivate(va->va_type, va->va_rdev, DEV_BSIZE);
795 rv = makevnode(dvp->v_mount, rn, vpp);
796 if (rv)
797 goto out;
798
799 makedir(rnd, cnp, rn);
800
801 out:
802 PNBUF_PUT(cnp->cn_pnbuf);
803 vput(dvp);
804 return rv;
805 }
806
807 static int
808 rump_vop_create(void *v)
809 {
810 struct vop_create_args /* {
811 struct vnode *a_dvp;
812 struct vnode **a_vpp;
813 struct componentname *a_cnp;
814 struct vattr *a_vap;
815 }; */ *ap = v;
816 struct vnode *dvp = ap->a_dvp;
817 struct vnode **vpp = ap->a_vpp;
818 struct componentname *cnp = ap->a_cnp;
819 struct vattr *va = ap->a_vap;
820 struct rumpfs_node *rnd = dvp->v_data, *rn;
821 int rv;
822
823 if (va->va_type != VSOCK) {
824 rv = EOPNOTSUPP;
825 goto out;
826 }
827 rn = makeprivate(VSOCK, NODEV, DEV_BSIZE);
828 rv = makevnode(dvp->v_mount, rn, vpp);
829 if (rv)
830 goto out;
831
832 makedir(rnd, cnp, rn);
833
834 out:
835 PNBUF_PUT(cnp->cn_pnbuf);
836 vput(dvp);
837 return rv;
838 }
839
840 static int
841 rump_vop_symlink(void *v)
842 {
843 struct vop_symlink_args /* {
844 struct vnode *a_dvp;
845 struct vnode **a_vpp;
846 struct componentname *a_cnp;
847 struct vattr *a_vap;
848 char *a_target;
849 }; */ *ap = v;
850 struct vnode *dvp = ap->a_dvp;
851 struct vnode **vpp = ap->a_vpp;
852 struct componentname *cnp = ap->a_cnp;
853 struct rumpfs_node *rnd = dvp->v_data, *rn;
854 const char *target = ap->a_target;
855 size_t linklen;
856 int rv;
857
858 linklen = strlen(target);
859 KASSERT(linklen < MAXPATHLEN);
860 rn = makeprivate(VLNK, NODEV, linklen);
861 rv = makevnode(dvp->v_mount, rn, vpp);
862 if (rv)
863 goto out;
864
865 makedir(rnd, cnp, rn);
866
867 KASSERT(linklen < MAXPATHLEN);
868 rn->rn_linktarg = PNBUF_GET();
869 rn->rn_linklen = linklen;
870 strcpy(rn->rn_linktarg, target);
871
872 out:
873 vput(dvp);
874 return rv;
875 }
876
877 static int
878 rump_vop_readlink(void *v)
879 {
880 struct vop_readlink_args /* {
881 struct vnode *a_vp;
882 struct uio *a_uio;
883 kauth_cred_t a_cred;
884 }; */ *ap = v;
885 struct vnode *vp = ap->a_vp;
886 struct rumpfs_node *rn = vp->v_data;
887 struct uio *uio = ap->a_uio;
888
889 return uiomove(rn->rn_linktarg, rn->rn_linklen, uio);
890 }
891
892 static int
893 rump_vop_whiteout(void *v)
894 {
895 struct vop_whiteout_args /* {
896 struct vnode *a_dvp;
897 struct componentname *a_cnp;
898 int a_flags;
899 } */ *ap = v;
900 struct vnode *dvp = ap->a_dvp;
901 struct rumpfs_node *rnd = dvp->v_data;
902 struct componentname *cnp = ap->a_cnp;
903 int flags = ap->a_flags;
904
905 switch (flags) {
906 case LOOKUP:
907 break;
908 case CREATE:
909 makedir(rnd, cnp, RUMPFS_WHITEOUT);
910 break;
911 case DELETE:
912 cnp->cn_flags &= ~DOWHITEOUT; /* cargo culting never fails ? */
913 freedir(rnd, cnp);
914 break;
915 default:
916 panic("unknown whiteout op %d", flags);
917 }
918
919 return 0;
920 }
921
922 static int
923 rump_vop_open(void *v)
924 {
925 struct vop_open_args /* {
926 struct vnode *a_vp;
927 int a_mode;
928 kauth_cred_t a_cred;
929 } */ *ap = v;
930 struct vnode *vp = ap->a_vp;
931 struct rumpfs_node *rn = vp->v_data;
932 int mode = ap->a_mode;
933 int error = EINVAL;
934
935 if (vp->v_type != VREG)
936 return 0;
937
938 if (mode & FREAD) {
939 if (rn->rn_readfd != -1)
940 return 0;
941 rn->rn_readfd = rumpuser_open(rn->rn_hostpath,
942 O_RDONLY, &error);
943 }
944
945 if (mode & FWRITE) {
946 if (rn->rn_writefd != -1)
947 return 0;
948 rn->rn_writefd = rumpuser_open(rn->rn_hostpath,
949 O_WRONLY, &error);
950 }
951
952 return error;
953 }
954
955 /* simple readdir. event omits dotstuff and periods */
956 static int
957 rump_vop_readdir(void *v)
958 {
959 struct vop_readdir_args /* {
960 struct vnode *a_vp;
961 struct uio *a_uio;
962 kauth_cred_t a_cred;
963 int *a_eofflag;
964 off_t **a_cookies;
965 int *a_ncookies;
966 } */ *ap = v;
967 struct vnode *vp = ap->a_vp;
968 struct uio *uio = ap->a_uio;
969 struct rumpfs_node *rnd = vp->v_data;
970 struct rumpfs_dent *rdent;
971 unsigned i;
972 int rv = 0;
973
974 /* seek to current entry */
975 for (i = 0, rdent = LIST_FIRST(&rnd->rn_dir);
976 (i < uio->uio_offset) && rdent;
977 i++, rdent = LIST_NEXT(rdent, rd_entries))
978 continue;
979 if (!rdent)
980 goto out;
981
982 /* copy entries */
983 for (; rdent && uio->uio_resid > 0;
984 rdent = LIST_NEXT(rdent, rd_entries), i++) {
985 struct dirent dent;
986
987 strlcpy(dent.d_name, rdent->rd_name, sizeof(dent.d_name));
988 dent.d_namlen = strlen(dent.d_name);
989 dent.d_reclen = _DIRENT_RECLEN(&dent, dent.d_namlen);
990
991 if (__predict_false(RDENT_ISWHITEOUT(rdent))) {
992 dent.d_fileno = INO_WHITEOUT;
993 dent.d_type = DT_WHT;
994 } else {
995 dent.d_fileno = rdent->rd_node->rn_va.va_fileid;
996 dent.d_type = vtype2dt(rdent->rd_node->rn_va.va_type);
997 }
998
999 if (uio->uio_resid < dent.d_reclen) {
1000 i--;
1001 break;
1002 }
1003
1004 rv = uiomove(&dent, dent.d_reclen, uio);
1005 if (rv) {
1006 i--;
1007 break;
1008 }
1009 }
1010
1011 out:
1012 if (ap->a_cookies) {
1013 *ap->a_ncookies = 0;
1014 *ap->a_cookies = NULL;
1015 }
1016 if (rdent)
1017 *ap->a_eofflag = 0;
1018 else
1019 *ap->a_eofflag = 1;
1020 uio->uio_offset = i;
1021
1022 return rv;
1023 }
1024
1025 static int
1026 rump_vop_read(void *v)
1027 {
1028 struct vop_read_args /* {
1029 struct vnode *a_vp;
1030 struct uio *a_uio;
1031 int ioflags a_ioflag;
1032 kauth_cred_t a_cred;
1033 }; */ *ap = v;
1034 struct vnode *vp = ap->a_vp;
1035 struct rumpfs_node *rn = vp->v_data;
1036 struct uio *uio = ap->a_uio;
1037 uint8_t *buf;
1038 size_t bufsize;
1039 ssize_t n;
1040 int error = 0;
1041
1042 bufsize = uio->uio_resid;
1043 buf = kmem_alloc(bufsize, KM_SLEEP);
1044 if ((n = rumpuser_pread(rn->rn_readfd, buf, bufsize,
1045 uio->uio_offset + rn->rn_offset, &error)) == -1)
1046 goto out;
1047 KASSERT(n <= bufsize);
1048 error = uiomove(buf, n, uio);
1049
1050 out:
1051 kmem_free(buf, bufsize);
1052 return error;
1053 }
1054
1055 static int
1056 rump_vop_write(void *v)
1057 {
1058 struct vop_read_args /* {
1059 struct vnode *a_vp;
1060 struct uio *a_uio;
1061 int ioflags a_ioflag;
1062 kauth_cred_t a_cred;
1063 }; */ *ap = v;
1064 struct vnode *vp = ap->a_vp;
1065 struct rumpfs_node *rn = vp->v_data;
1066 struct uio *uio = ap->a_uio;
1067 uint8_t *buf;
1068 size_t bufsize;
1069 ssize_t n;
1070 int error = 0;
1071
1072 bufsize = uio->uio_resid;
1073 buf = kmem_alloc(bufsize, KM_SLEEP);
1074 error = uiomove(buf, bufsize, uio);
1075 if (error)
1076 goto out;
1077 KASSERT(uio->uio_resid == 0);
1078 n = rumpuser_pwrite(rn->rn_writefd, buf, bufsize,
1079 (uio->uio_offset-bufsize) + rn->rn_offset, &error);
1080 if (n >= 0) {
1081 KASSERT(n <= bufsize);
1082 uio->uio_resid = bufsize - n;
1083 }
1084
1085 out:
1086 kmem_free(buf, bufsize);
1087 return error;
1088 }
1089
1090 static int
1091 rump_vop_success(void *v)
1092 {
1093
1094 return 0;
1095 }
1096
1097 static int
1098 rump_vop_inactive(void *v)
1099 {
1100 struct vop_inactive_args /* {
1101 struct vnode *a_vp;
1102 bool *a_recycle;
1103 } */ *ap = v;
1104 struct vnode *vp = ap->a_vp;
1105 struct rumpfs_node *rn = vp->v_data;
1106 int error;
1107
1108 if (vp->v_type == VREG) {
1109 if (rn->rn_readfd != -1) {
1110 rumpuser_close(rn->rn_readfd, &error);
1111 rn->rn_readfd = -1;
1112 }
1113 if (rn->rn_writefd != -1) {
1114 rumpuser_close(rn->rn_writefd, &error);
1115 rn->rn_writefd = -1;
1116 }
1117 }
1118 *ap->a_recycle = (rn->rn_flags & RUMPNODE_CANRECLAIM) ? true : false;
1119
1120 VOP_UNLOCK(vp);
1121 return 0;
1122 }
1123
1124 static int
1125 rump_vop_reclaim(void *v)
1126 {
1127 struct vop_reclaim_args /* {
1128 struct vnode *a_vp;
1129 } */ *ap = v;
1130 struct vnode *vp = ap->a_vp;
1131 struct rumpfs_node *rn = vp->v_data;
1132
1133 mutex_enter(&reclock);
1134 rn->rn_vp = NULL;
1135 mutex_exit(&reclock);
1136 vp->v_data = NULL;
1137
1138 if (rn->rn_flags & RUMPNODE_CANRECLAIM) {
1139 if (vp->v_type == VLNK)
1140 PNBUF_PUT(rn->rn_linktarg);
1141 if (rn->rn_hostpath)
1142 free(rn->rn_hostpath, M_TEMP);
1143 kmem_free(rn, sizeof(*rn));
1144 }
1145
1146 return 0;
1147 }
1148
1149 static int
1150 rump_vop_spec(void *v)
1151 {
1152 struct vop_generic_args *ap = v;
1153 int (**opvec)(void *);
1154
1155 switch (ap->a_desc->vdesc_offset) {
1156 case VOP_ACCESS_DESCOFFSET:
1157 case VOP_GETATTR_DESCOFFSET:
1158 case VOP_LOCK_DESCOFFSET:
1159 case VOP_UNLOCK_DESCOFFSET:
1160 case VOP_RECLAIM_DESCOFFSET:
1161 opvec = rump_vnodeop_p;
1162 break;
1163 default:
1164 opvec = spec_vnodeop_p;
1165 break;
1166 }
1167
1168 return VOCALL(opvec, ap->a_desc->vdesc_offset, v);
1169 }
1170
1171 /*
1172 * Begin vfs-level stuff
1173 */
1174
1175 VFS_PROTOS(rumpfs);
1176 struct vfsops rumpfs_vfsops = {
1177 .vfs_name = MOUNT_RUMPFS,
1178 .vfs_min_mount_data = 0,
1179 .vfs_mount = rumpfs_mount,
1180 .vfs_start = (void *)nullop,
1181 .vfs_unmount = rumpfs_unmount,
1182 .vfs_root = rumpfs_root,
1183 .vfs_quotactl = (void *)eopnotsupp,
1184 .vfs_statvfs = genfs_statvfs,
1185 .vfs_sync = (void *)nullop,
1186 .vfs_vget = rumpfs_vget,
1187 .vfs_fhtovp = (void *)eopnotsupp,
1188 .vfs_vptofh = (void *)eopnotsupp,
1189 .vfs_init = rumpfs_init,
1190 .vfs_reinit = NULL,
1191 .vfs_done = rumpfs_done,
1192 .vfs_mountroot = rumpfs_mountroot,
1193 .vfs_snapshot = (void *)eopnotsupp,
1194 .vfs_extattrctl = (void *)eopnotsupp,
1195 .vfs_suspendctl = (void *)eopnotsupp,
1196 .vfs_opv_descs = rump_opv_descs,
1197 /* vfs_refcount */
1198 /* vfs_list */
1199 };
1200
1201 int
1202 rumpfs_mount(struct mount *mp, const char *mntpath, void *arg, size_t *alen)
1203 {
1204
1205 return EOPNOTSUPP;
1206 }
1207
1208 int
1209 rumpfs_unmount(struct mount *mp, int flags)
1210 {
1211
1212 /* if going for it, just lie about it */
1213 if (panicstr)
1214 return 0;
1215
1216 return EOPNOTSUPP; /* ;) */
1217 }
1218
1219 int
1220 rumpfs_root(struct mount *mp, struct vnode **vpp)
1221 {
1222 struct rumpfs_mount *rfsmp = mp->mnt_data;
1223
1224 vref(rfsmp->rfsmp_rvp);
1225 vn_lock(rfsmp->rfsmp_rvp, LK_EXCLUSIVE | LK_RETRY);
1226 *vpp = rfsmp->rfsmp_rvp;
1227 return 0;
1228 }
1229
1230 int
1231 rumpfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1232 {
1233
1234 return EOPNOTSUPP;
1235 }
1236
1237 void
1238 rumpfs_init()
1239 {
1240
1241 CTASSERT(RUMP_ETFS_SIZE_ENDOFF == RUMPBLK_SIZENOTSET);
1242
1243 mutex_init(&reclock, MUTEX_DEFAULT, IPL_NONE);
1244 mutex_init(&etfs_lock, MUTEX_DEFAULT, IPL_NONE);
1245 }
1246
1247 void
1248 rumpfs_done()
1249 {
1250
1251 mutex_destroy(&reclock);
1252 mutex_destroy(&etfs_lock);
1253 }
1254
1255 int
1256 rumpfs_mountroot()
1257 {
1258 struct mount *mp;
1259 struct rumpfs_mount *rfsmp;
1260 struct rumpfs_node *rn;
1261 int error;
1262
1263 if ((error = vfs_rootmountalloc(MOUNT_RUMPFS, "rootdev", &mp)) != 0) {
1264 vrele(rootvp);
1265 return error;
1266 }
1267
1268 rfsmp = kmem_alloc(sizeof(*rfsmp), KM_SLEEP);
1269
1270 rn = makeprivate(VDIR, NODEV, DEV_BSIZE);
1271 rn->rn_parent = rn;
1272 error = makevnode(mp, rn, &rfsmp->rfsmp_rvp);
1273 if (error)
1274 panic("could not create root vnode: %d", error);
1275 rfsmp->rfsmp_rvp->v_vflag |= VV_ROOT;
1276 VOP_UNLOCK(rfsmp->rfsmp_rvp);
1277
1278 mutex_enter(&mountlist_lock);
1279 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1280 mutex_exit(&mountlist_lock);
1281
1282 mp->mnt_data = rfsmp;
1283 mp->mnt_stat.f_namemax = MAXNAMLEN;
1284 mp->mnt_stat.f_iosize = 512;
1285 mp->mnt_flag |= MNT_LOCAL;
1286 mp->mnt_iflag |= IMNT_MPSAFE;
1287 vfs_getnewfsid(mp);
1288
1289 error = set_statvfs_info("/", UIO_SYSSPACE, "rumpfs", UIO_SYSSPACE,
1290 mp->mnt_op->vfs_name, mp, curlwp);
1291 if (error)
1292 panic("set statvfsinfo for rootfs failed");
1293
1294 vfs_unbusy(mp, false, NULL);
1295
1296 return 0;
1297 }
1298