rumpfs.c revision 1.70 1 /* $NetBSD: rumpfs.c,v 1.70 2010/11/11 16:08:31 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: rumpfs.c,v 1.70 2010/11/11 16:08:31 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/dirent.h>
34 #include <sys/errno.h>
35 #include <sys/filedesc.h>
36 #include <sys/fcntl.h>
37 #include <sys/kauth.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/mount.h>
41 #include <sys/namei.h>
42 #include <sys/lock.h>
43 #include <sys/lockf.h>
44 #include <sys/queue.h>
45 #include <sys/stat.h>
46 #include <sys/syscallargs.h>
47 #include <sys/vnode.h>
48 #include <sys/unistd.h>
49
50 #include <miscfs/fifofs/fifo.h>
51 #include <miscfs/specfs/specdev.h>
52 #include <miscfs/genfs/genfs.h>
53 #include <miscfs/genfs/genfs_node.h>
54
55 #include <rump/rumpuser.h>
56
57 #include "rump_private.h"
58 #include "rump_vfs_private.h"
59
60 static int rump_vop_lookup(void *);
61 static int rump_vop_getattr(void *);
62 static int rump_vop_mkdir(void *);
63 static int rump_vop_rmdir(void *);
64 static int rump_vop_mknod(void *);
65 static int rump_vop_create(void *);
66 static int rump_vop_inactive(void *);
67 static int rump_vop_reclaim(void *);
68 static int rump_vop_success(void *);
69 static int rump_vop_readdir(void *);
70 static int rump_vop_spec(void *);
71 static int rump_vop_read(void *);
72 static int rump_vop_write(void *);
73 static int rump_vop_open(void *);
74 static int rump_vop_symlink(void *);
75 static int rump_vop_readlink(void *);
76 static int rump_vop_whiteout(void *);
77 static int rump_vop_pathconf(void *);
78
79 int (**fifo_vnodeop_p)(void *);
80 const struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
81 { &vop_default_desc, vn_default_error },
82 { NULL, NULL }
83 };
84 const struct vnodeopv_desc fifo_vnodeop_opv_desc =
85 { &fifo_vnodeop_p, fifo_vnodeop_entries };
86
87 int (**rump_vnodeop_p)(void *);
88 const struct vnodeopv_entry_desc rump_vnodeop_entries[] = {
89 { &vop_default_desc, vn_default_error },
90 { &vop_lookup_desc, rump_vop_lookup },
91 { &vop_getattr_desc, rump_vop_getattr },
92 { &vop_mkdir_desc, rump_vop_mkdir },
93 { &vop_rmdir_desc, rump_vop_rmdir },
94 { &vop_mknod_desc, rump_vop_mknod },
95 { &vop_create_desc, rump_vop_create },
96 { &vop_symlink_desc, rump_vop_symlink },
97 { &vop_readlink_desc, rump_vop_readlink },
98 { &vop_access_desc, rump_vop_success },
99 { &vop_readdir_desc, rump_vop_readdir },
100 { &vop_read_desc, rump_vop_read },
101 { &vop_write_desc, rump_vop_write },
102 { &vop_open_desc, rump_vop_open },
103 { &vop_close_desc, genfs_nullop },
104 { &vop_seek_desc, genfs_seek },
105 { &vop_getpages_desc, genfs_getpages },
106 { &vop_putpages_desc, genfs_putpages },
107 { &vop_whiteout_desc, rump_vop_whiteout },
108 { &vop_fsync_desc, rump_vop_success },
109 { &vop_lock_desc, genfs_lock },
110 { &vop_unlock_desc, genfs_unlock },
111 { &vop_islocked_desc, genfs_islocked },
112 { &vop_inactive_desc, rump_vop_inactive },
113 { &vop_reclaim_desc, rump_vop_reclaim },
114 { &vop_remove_desc, genfs_eopnotsupp },
115 { &vop_link_desc, genfs_eopnotsupp },
116 { &vop_pathconf_desc, rump_vop_pathconf },
117 { NULL, NULL }
118 };
119 const struct vnodeopv_desc rump_vnodeop_opv_desc =
120 { &rump_vnodeop_p, rump_vnodeop_entries };
121
122 int (**rump_specop_p)(void *);
123 const struct vnodeopv_entry_desc rump_specop_entries[] = {
124 { &vop_default_desc, rump_vop_spec },
125 { NULL, NULL }
126 };
127 const struct vnodeopv_desc rump_specop_opv_desc =
128 { &rump_specop_p, rump_specop_entries };
129
130 const struct vnodeopv_desc * const rump_opv_descs[] = {
131 &rump_vnodeop_opv_desc,
132 &rump_specop_opv_desc,
133 NULL
134 };
135
136 #define RUMPFS_WHITEOUT NULL
137 #define RDENT_ISWHITEOUT(rdp) (rdp->rd_node == RUMPFS_WHITEOUT)
138 struct rumpfs_dent {
139 char *rd_name;
140 int rd_namelen;
141 struct rumpfs_node *rd_node;
142
143 LIST_ENTRY(rumpfs_dent) rd_entries;
144 };
145
146 struct genfs_ops rumpfs_genfsops = {
147 .gop_size = genfs_size,
148 .gop_write = genfs_gop_write,
149
150 /* optional */
151 .gop_alloc = NULL,
152 .gop_markupdate = NULL,
153 };
154
155 struct rumpfs_node {
156 struct genfs_node rn_gn;
157 struct vattr rn_va;
158 struct vnode *rn_vp;
159 char *rn_hostpath;
160 int rn_flags;
161
162 union {
163 struct { /* VREG */
164 int readfd;
165 int writefd;
166 uint64_t offset;
167 } reg;
168 struct { /* VDIR */
169 LIST_HEAD(, rumpfs_dent) dents;
170 struct rumpfs_node *parent;
171 int flags;
172 } dir;
173 struct {
174 char *target;
175 size_t len;
176 } link;
177 } rn_u;
178 };
179 #define rn_readfd rn_u.reg.readfd
180 #define rn_writefd rn_u.reg.writefd
181 #define rn_offset rn_u.reg.offset
182 #define rn_dir rn_u.dir.dents
183 #define rn_parent rn_u.dir.parent
184 #define rn_linktarg rn_u.link.target
185 #define rn_linklen rn_u.link.len
186
187 #define RUMPNODE_CANRECLAIM 0x01
188 #define RUMPNODE_DIR_ET 0x02
189 #define RUMPNODE_DIR_ETSUBS 0x04
190 #define RUMPNODE_ET_PHONE_HOST 0x10
191
192 struct rumpfs_mount {
193 struct vnode *rfsmp_rvp;
194 };
195
196 static struct rumpfs_node *makeprivate(enum vtype, dev_t, off_t);
197
198 /*
199 * Extra Terrestrial stuff. We map a given key (pathname) to a file on
200 * the host FS. ET phones home only from the root node of rumpfs.
201 *
202 * When an etfs node is removed, a vnode potentially behind it is not
203 * immediately recycled.
204 */
205
206 struct etfs {
207 char et_key[MAXPATHLEN];
208 size_t et_keylen;
209 bool et_prefixkey;
210 bool et_removing;
211 devminor_t et_blkmin;
212
213 LIST_ENTRY(etfs) et_entries;
214
215 struct rumpfs_node *et_rn;
216 };
217 static kmutex_t etfs_lock;
218 static LIST_HEAD(, etfs) etfs_list = LIST_HEAD_INITIALIZER(etfs_list);
219
220 static enum vtype
221 ettype_to_vtype(enum rump_etfs_type et)
222 {
223 enum vtype vt;
224
225 switch (et) {
226 case RUMP_ETFS_REG:
227 vt = VREG;
228 break;
229 case RUMP_ETFS_BLK:
230 vt = VBLK;
231 break;
232 case RUMP_ETFS_CHR:
233 vt = VCHR;
234 break;
235 case RUMP_ETFS_DIR:
236 vt = VDIR;
237 break;
238 case RUMP_ETFS_DIR_SUBDIRS:
239 vt = VDIR;
240 break;
241 default:
242 panic("invalid et type: %d", et);
243 }
244
245 return vt;
246 }
247
248 static enum vtype
249 hft_to_vtype(int hft)
250 {
251 enum vtype vt;
252
253 switch (hft) {
254 case RUMPUSER_FT_OTHER:
255 vt = VNON;
256 break;
257 case RUMPUSER_FT_DIR:
258 vt = VDIR;
259 break;
260 case RUMPUSER_FT_REG:
261 vt = VREG;
262 break;
263 case RUMPUSER_FT_BLK:
264 vt = VBLK;
265 break;
266 case RUMPUSER_FT_CHR:
267 vt = VCHR;
268 break;
269 default:
270 vt = VNON;
271 break;
272 }
273
274 return vt;
275 }
276
277 static bool
278 etfs_find(const char *key, struct etfs **etp, bool forceprefix)
279 {
280 struct etfs *et;
281 size_t keylen = strlen(key);
282
283 KASSERT(mutex_owned(&etfs_lock));
284
285 LIST_FOREACH(et, &etfs_list, et_entries) {
286 if ((keylen == et->et_keylen || et->et_prefixkey || forceprefix)
287 && strncmp(key, et->et_key, et->et_keylen) == 0) {
288 if (etp)
289 *etp = et;
290 return true;
291 }
292 }
293
294 return false;
295 }
296
297 #define REGDIR(ftype) \
298 ((ftype) == RUMP_ETFS_DIR || (ftype) == RUMP_ETFS_DIR_SUBDIRS)
299 static int
300 doregister(const char *key, const char *hostpath,
301 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
302 {
303 char buf[9];
304 struct etfs *et;
305 struct rumpfs_node *rn;
306 uint64_t fsize;
307 dev_t rdev = NODEV;
308 devminor_t dmin = -1;
309 int hft, error;
310
311 if (rumpuser_getfileinfo(hostpath, &fsize, &hft, &error))
312 return error;
313
314 /* etfs directory requires a directory on the host */
315 if (REGDIR(ftype)) {
316 if (hft != RUMPUSER_FT_DIR)
317 return ENOTDIR;
318 if (begin != 0)
319 return EISDIR;
320 if (size != RUMP_ETFS_SIZE_ENDOFF)
321 return EISDIR;
322 size = fsize;
323 } else {
324 if (begin > fsize)
325 return EINVAL;
326 if (size == RUMP_ETFS_SIZE_ENDOFF)
327 size = fsize - begin;
328 if (begin + size > fsize)
329 return EINVAL;
330 }
331
332 if (ftype == RUMP_ETFS_BLK || ftype == RUMP_ETFS_CHR) {
333 error = rumpblk_register(hostpath, &dmin, begin, size);
334 if (error != 0) {
335 return error;
336 }
337 rdev = makedev(RUMPBLK_DEVMAJOR, dmin);
338 }
339
340 et = kmem_alloc(sizeof(*et), KM_SLEEP);
341 strcpy(et->et_key, key);
342 et->et_keylen = strlen(et->et_key);
343 et->et_rn = rn = makeprivate(ettype_to_vtype(ftype), rdev, size);
344 et->et_removing = false;
345 et->et_blkmin = dmin;
346
347 rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
348
349 if (ftype == RUMP_ETFS_REG || REGDIR(ftype) || et->et_blkmin != -1) {
350 size_t len = strlen(hostpath)+1;
351
352 rn->rn_hostpath = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
353 memcpy(rn->rn_hostpath, hostpath, len);
354 rn->rn_offset = begin;
355 }
356
357 if (REGDIR(ftype)) {
358 rn->rn_flags |= RUMPNODE_DIR_ET;
359 et->et_prefixkey = true;
360 } else {
361 et->et_prefixkey = false;
362 }
363
364 if (ftype == RUMP_ETFS_DIR_SUBDIRS)
365 rn->rn_flags |= RUMPNODE_DIR_ETSUBS;
366
367 mutex_enter(&etfs_lock);
368 if (etfs_find(key, NULL, REGDIR(ftype))) {
369 mutex_exit(&etfs_lock);
370 if (et->et_blkmin != -1)
371 rumpblk_deregister(hostpath);
372 if (et->et_rn->rn_hostpath != NULL)
373 free(et->et_rn->rn_hostpath, M_TEMP);
374 kmem_free(et->et_rn, sizeof(*et->et_rn));
375 kmem_free(et, sizeof(*et));
376 return EEXIST;
377 }
378 LIST_INSERT_HEAD(&etfs_list, et, et_entries);
379 mutex_exit(&etfs_lock);
380
381 if (ftype == RUMP_ETFS_BLK) {
382 format_bytes(buf, sizeof(buf), size);
383 aprint_verbose("%s: hostpath %s (%s)\n", key, hostpath, buf);
384 }
385
386 return 0;
387 }
388 #undef REGDIR
389
390 int
391 rump_etfs_register(const char *key, const char *hostpath,
392 enum rump_etfs_type ftype)
393 {
394
395 return doregister(key, hostpath, ftype, 0, RUMP_ETFS_SIZE_ENDOFF);
396 }
397
398 int
399 rump_etfs_register_withsize(const char *key, const char *hostpath,
400 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
401 {
402
403 return doregister(key, hostpath, ftype, begin, size);
404 }
405
406 /* remove etfs mapping. caller's responsibility to make sure it's not in use */
407 int
408 rump_etfs_remove(const char *key)
409 {
410 struct etfs *et;
411 size_t keylen = strlen(key);
412 int rv;
413
414 mutex_enter(&etfs_lock);
415 LIST_FOREACH(et, &etfs_list, et_entries) {
416 if (keylen == et->et_keylen && strcmp(et->et_key, key) == 0) {
417 if (et->et_removing)
418 et = NULL;
419 else
420 et->et_removing = true;
421 break;
422 }
423 }
424 mutex_exit(&etfs_lock);
425 if (!et)
426 return ENOENT;
427
428 /*
429 * ok, we know what we want to remove and have signalled there
430 * actually are men at work. first, unregister from rumpblk
431 */
432 if (et->et_blkmin != -1) {
433 rv = rumpblk_deregister(et->et_rn->rn_hostpath);
434 } else {
435 rv = 0;
436 }
437 KASSERT(rv == 0);
438
439 /* then do the actual removal */
440 mutex_enter(&etfs_lock);
441 LIST_REMOVE(et, et_entries);
442 mutex_exit(&etfs_lock);
443
444 /* node is unreachable, safe to nuke all device copies */
445 if (et->et_blkmin != -1)
446 vdevgone(RUMPBLK_DEVMAJOR, et->et_blkmin, et->et_blkmin, VBLK);
447
448 if (et->et_rn->rn_hostpath != NULL)
449 free(et->et_rn->rn_hostpath, M_TEMP);
450 kmem_free(et->et_rn, sizeof(*et->et_rn));
451 kmem_free(et, sizeof(*et));
452
453 return 0;
454 }
455
456 /*
457 * rumpfs
458 */
459
460 #define INO_WHITEOUT 1
461 static int lastino = 2;
462 static kmutex_t reclock;
463
464 static struct rumpfs_node *
465 makeprivate(enum vtype vt, dev_t rdev, off_t size)
466 {
467 struct rumpfs_node *rn;
468 struct vattr *va;
469 struct timespec ts;
470
471 rn = kmem_zalloc(sizeof(*rn), KM_SLEEP);
472
473 switch (vt) {
474 case VDIR:
475 LIST_INIT(&rn->rn_dir);
476 break;
477 case VREG:
478 rn->rn_readfd = -1;
479 rn->rn_writefd = -1;
480 break;
481 default:
482 break;
483 }
484
485 nanotime(&ts);
486
487 va = &rn->rn_va;
488 va->va_type = vt;
489 va->va_mode = 0755;
490 if (vt == VDIR)
491 va->va_nlink = 2;
492 else
493 va->va_nlink = 1;
494 va->va_uid = 0;
495 va->va_gid = 0;
496 va->va_fsid =
497 va->va_fileid = atomic_inc_uint_nv(&lastino);
498 va->va_size = size;
499 va->va_blocksize = 512;
500 va->va_atime = ts;
501 va->va_mtime = ts;
502 va->va_ctime = ts;
503 va->va_birthtime = ts;
504 va->va_gen = 0;
505 va->va_flags = 0;
506 va->va_rdev = rdev;
507 va->va_bytes = 512;
508 va->va_filerev = 0;
509 va->va_vaflags = 0;
510
511 return rn;
512 }
513
514 static int
515 makevnode(struct mount *mp, struct rumpfs_node *rn, struct vnode **vpp)
516 {
517 struct vnode *vp;
518 int (**vpops)(void *);
519 struct vattr *va = &rn->rn_va;
520 int rv;
521
522 KASSERT(!mutex_owned(&reclock));
523
524 if (va->va_type == VCHR || va->va_type == VBLK) {
525 vpops = rump_specop_p;
526 } else {
527 vpops = rump_vnodeop_p;
528 }
529
530 rv = getnewvnode(VT_RUMP, mp, vpops, &vp);
531 if (rv)
532 return rv;
533
534 vp->v_size = vp->v_writesize = va->va_size;
535 vp->v_type = va->va_type;
536
537 if (vpops == rump_specop_p) {
538 spec_node_init(vp, va->va_rdev);
539 }
540 vp->v_data = rn;
541
542 genfs_node_init(vp, &rumpfs_genfsops);
543 vn_lock(vp, LK_RETRY | LK_EXCLUSIVE);
544 mutex_enter(&reclock);
545 rn->rn_vp = vp;
546 mutex_exit(&reclock);
547
548 *vpp = vp;
549
550 return 0;
551 }
552
553
554 static void
555 makedir(struct rumpfs_node *rnd,
556 struct componentname *cnp, struct rumpfs_node *rn)
557 {
558 struct rumpfs_dent *rdent;
559
560 rdent = kmem_alloc(sizeof(*rdent), KM_SLEEP);
561 rdent->rd_name = kmem_alloc(cnp->cn_namelen+1, KM_SLEEP);
562 rdent->rd_node = rn;
563 strlcpy(rdent->rd_name, cnp->cn_nameptr, cnp->cn_namelen+1);
564 rdent->rd_namelen = strlen(rdent->rd_name);
565
566 LIST_INSERT_HEAD(&rnd->rn_dir, rdent, rd_entries);
567 }
568
569 static void
570 freedir(struct rumpfs_node *rnd, struct componentname *cnp)
571 {
572 struct rumpfs_dent *rd = NULL;
573
574 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
575 if (rd->rd_namelen == cnp->cn_namelen &&
576 strncmp(rd->rd_name, cnp->cn_nameptr,
577 cnp->cn_namelen) == 0)
578 break;
579 }
580 if (rd == NULL)
581 panic("could not find directory entry: %s", cnp->cn_nameptr);
582
583 LIST_REMOVE(rd, rd_entries);
584 kmem_free(rd->rd_name, rd->rd_namelen+1);
585 kmem_free(rd, sizeof(*rd));
586 }
587
588 /*
589 * Simple lookup for rump file systems.
590 *
591 * uhm, this is twisted. C F C C, hope of C C F C looming
592 */
593 static int
594 rump_vop_lookup(void *v)
595 {
596 struct vop_lookup_args /* {
597 struct vnode *a_dvp;
598 struct vnode **a_vpp;
599 struct componentname *a_cnp;
600 }; */ *ap = v;
601 struct componentname *cnp = ap->a_cnp;
602 struct vnode *dvp = ap->a_dvp;
603 struct vnode **vpp = ap->a_vpp;
604 struct vnode *vp;
605 struct rumpfs_node *rnd = dvp->v_data, *rn;
606 struct rumpfs_dent *rd = NULL;
607 struct etfs *et;
608 bool dotdot = (cnp->cn_flags & ISDOTDOT) != 0;
609 int rv = 0;
610
611 /* check for dot, return directly if the case */
612 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
613 vref(dvp);
614 *vpp = dvp;
615 return 0;
616 }
617
618 /* we handle only some "non-special" cases */
619 if (!(((cnp->cn_flags & ISLASTCN) == 0) || (cnp->cn_nameiop != RENAME)))
620 return EOPNOTSUPP;
621
622 /* check for etfs */
623 if (dvp == rootvnode && cnp->cn_nameiop == LOOKUP) {
624 bool found;
625 mutex_enter(&etfs_lock);
626 found = etfs_find(cnp->cn_pnbuf, &et, false);
627 mutex_exit(&etfs_lock);
628
629 if (found) {
630 char *offset;
631
632 offset = strstr(cnp->cn_pnbuf, et->et_key);
633 KASSERT(offset);
634
635 rn = et->et_rn;
636 cnp->cn_consume += et->et_keylen
637 - (cnp->cn_nameptr - offset) - cnp->cn_namelen;
638 if (rn->rn_va.va_type != VDIR)
639 cnp->cn_flags &= ~REQUIREDIR;
640 goto getvnode;
641 }
642 }
643
644 if (rnd->rn_flags & RUMPNODE_DIR_ET) {
645 uint64_t fsize;
646 char *newpath;
647 size_t newpathlen;
648 int hft, error;
649
650 if (dotdot)
651 return EOPNOTSUPP;
652
653 newpathlen = strlen(rnd->rn_hostpath) + 1 + cnp->cn_namelen + 1;
654 newpath = malloc(newpathlen, M_TEMP, M_WAITOK);
655
656 strlcpy(newpath, rnd->rn_hostpath, newpathlen);
657 strlcat(newpath, "/", newpathlen);
658 strlcat(newpath, cnp->cn_nameptr, newpathlen);
659
660 if (rumpuser_getfileinfo(newpath, &fsize, &hft, &error)) {
661 free(newpath, M_TEMP);
662 return error;
663 }
664
665 /* allow only dirs and regular files */
666 if (hft != RUMPUSER_FT_REG && hft != RUMPUSER_FT_DIR) {
667 free(newpath, M_TEMP);
668 return ENOENT;
669 }
670
671 rn = makeprivate(hft_to_vtype(hft), NODEV, fsize);
672 rn->rn_flags |= RUMPNODE_CANRECLAIM;
673 if (rnd->rn_flags & RUMPNODE_DIR_ETSUBS) {
674 rn->rn_flags |= RUMPNODE_DIR_ET | RUMPNODE_DIR_ETSUBS;
675 }
676 rn->rn_hostpath = newpath;
677
678 goto getvnode;
679 } else {
680 if (dotdot) {
681 rn = rnd->rn_parent;
682 goto getvnode;
683 } else {
684 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
685 if (rd->rd_namelen == cnp->cn_namelen &&
686 strncmp(rd->rd_name, cnp->cn_nameptr,
687 cnp->cn_namelen) == 0)
688 break;
689 }
690 }
691 }
692
693 if (!rd && ((cnp->cn_flags & ISLASTCN) == 0||cnp->cn_nameiop != CREATE))
694 return ENOENT;
695
696 if (!rd && (cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
697 cnp->cn_flags |= SAVENAME;
698 return EJUSTRETURN;
699 }
700 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == DELETE)
701 cnp->cn_flags |= SAVENAME;
702
703 rn = rd->rd_node;
704
705 getvnode:
706 KASSERT(rn);
707 if (dotdot)
708 VOP_UNLOCK(dvp);
709 mutex_enter(&reclock);
710 if ((vp = rn->rn_vp)) {
711 mutex_enter(&vp->v_interlock);
712 mutex_exit(&reclock);
713 if (vget(vp, LK_EXCLUSIVE)) {
714 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
715 goto getvnode;
716 }
717 *vpp = vp;
718 } else {
719 mutex_exit(&reclock);
720 rv = makevnode(dvp->v_mount, rn, vpp);
721 }
722 if (dotdot)
723 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
724
725 return rv;
726 }
727
728 static int
729 rump_vop_getattr(void *v)
730 {
731 struct vop_getattr_args /* {
732 struct vnode *a_vp;
733 struct vattr *a_vap;
734 kauth_cred_t a_cred;
735 } */ *ap = v;
736 struct rumpfs_node *rn = ap->a_vp->v_data;
737
738 memcpy(ap->a_vap, &rn->rn_va, sizeof(struct vattr));
739 return 0;
740 }
741
742 static int
743 rump_vop_mkdir(void *v)
744 {
745 struct vop_mkdir_args /* {
746 struct vnode *a_dvp;
747 struct vnode **a_vpp;
748 struct componentname *a_cnp;
749 struct vattr *a_vap;
750 }; */ *ap = v;
751 struct vnode *dvp = ap->a_dvp;
752 struct vnode **vpp = ap->a_vpp;
753 struct componentname *cnp = ap->a_cnp;
754 struct rumpfs_node *rnd = dvp->v_data, *rn;
755 int rv = 0;
756
757 rn = makeprivate(VDIR, NODEV, DEV_BSIZE);
758 rn->rn_parent = rnd;
759 rv = makevnode(dvp->v_mount, rn, vpp);
760 if (rv)
761 goto out;
762
763 makedir(rnd, cnp, rn);
764
765 out:
766 PNBUF_PUT(cnp->cn_pnbuf);
767 vput(dvp);
768 return rv;
769 }
770
771 static int
772 rump_vop_rmdir(void *v)
773 {
774 struct vop_rmdir_args /* {
775 struct vnode *a_dvp;
776 struct vnode *a_vp;
777 struct componentname *a_cnp;
778 }; */ *ap = v;
779 struct vnode *dvp = ap->a_dvp;
780 struct vnode *vp = ap->a_vp;
781 struct componentname *cnp = ap->a_cnp;
782 struct rumpfs_node *rnd = dvp->v_data;
783 struct rumpfs_node *rn = vp->v_data;
784 int rv = 0;
785
786 if (!LIST_EMPTY(&rn->rn_dir)) {
787 rv = ENOTEMPTY;
788 goto out;
789 }
790
791 freedir(rnd, cnp);
792 rn->rn_flags |= RUMPNODE_CANRECLAIM;
793
794 out:
795 PNBUF_PUT(cnp->cn_pnbuf);
796 vput(dvp);
797 vput(vp);
798
799 return rv;
800 }
801
802 static int
803 rump_vop_mknod(void *v)
804 {
805 struct vop_mknod_args /* {
806 struct vnode *a_dvp;
807 struct vnode **a_vpp;
808 struct componentname *a_cnp;
809 struct vattr *a_vap;
810 }; */ *ap = v;
811 struct vnode *dvp = ap->a_dvp;
812 struct vnode **vpp = ap->a_vpp;
813 struct componentname *cnp = ap->a_cnp;
814 struct vattr *va = ap->a_vap;
815 struct rumpfs_node *rnd = dvp->v_data, *rn;
816 int rv;
817
818 rn = makeprivate(va->va_type, va->va_rdev, DEV_BSIZE);
819 rv = makevnode(dvp->v_mount, rn, vpp);
820 if (rv)
821 goto out;
822
823 makedir(rnd, cnp, rn);
824
825 out:
826 PNBUF_PUT(cnp->cn_pnbuf);
827 vput(dvp);
828 return rv;
829 }
830
831 static int
832 rump_vop_create(void *v)
833 {
834 struct vop_create_args /* {
835 struct vnode *a_dvp;
836 struct vnode **a_vpp;
837 struct componentname *a_cnp;
838 struct vattr *a_vap;
839 }; */ *ap = v;
840 struct vnode *dvp = ap->a_dvp;
841 struct vnode **vpp = ap->a_vpp;
842 struct componentname *cnp = ap->a_cnp;
843 struct vattr *va = ap->a_vap;
844 struct rumpfs_node *rnd = dvp->v_data, *rn;
845 off_t newsize;
846 int rv;
847
848 newsize = va->va_type == VSOCK ? DEV_BSIZE : 0;
849 rn = makeprivate(va->va_type, NODEV, newsize);
850 rv = makevnode(dvp->v_mount, rn, vpp);
851 if (rv)
852 goto out;
853
854 makedir(rnd, cnp, rn);
855
856 out:
857 PNBUF_PUT(cnp->cn_pnbuf);
858 vput(dvp);
859 return rv;
860 }
861
862 static int
863 rump_vop_symlink(void *v)
864 {
865 struct vop_symlink_args /* {
866 struct vnode *a_dvp;
867 struct vnode **a_vpp;
868 struct componentname *a_cnp;
869 struct vattr *a_vap;
870 char *a_target;
871 }; */ *ap = v;
872 struct vnode *dvp = ap->a_dvp;
873 struct vnode **vpp = ap->a_vpp;
874 struct componentname *cnp = ap->a_cnp;
875 struct rumpfs_node *rnd = dvp->v_data, *rn;
876 const char *target = ap->a_target;
877 size_t linklen;
878 int rv;
879
880 linklen = strlen(target);
881 KASSERT(linklen < MAXPATHLEN);
882 rn = makeprivate(VLNK, NODEV, linklen);
883 rv = makevnode(dvp->v_mount, rn, vpp);
884 if (rv)
885 goto out;
886
887 makedir(rnd, cnp, rn);
888
889 KASSERT(linklen < MAXPATHLEN);
890 rn->rn_linktarg = PNBUF_GET();
891 rn->rn_linklen = linklen;
892 strcpy(rn->rn_linktarg, target);
893
894 out:
895 vput(dvp);
896 return rv;
897 }
898
899 static int
900 rump_vop_readlink(void *v)
901 {
902 struct vop_readlink_args /* {
903 struct vnode *a_vp;
904 struct uio *a_uio;
905 kauth_cred_t a_cred;
906 }; */ *ap = v;
907 struct vnode *vp = ap->a_vp;
908 struct rumpfs_node *rn = vp->v_data;
909 struct uio *uio = ap->a_uio;
910
911 return uiomove(rn->rn_linktarg, rn->rn_linklen, uio);
912 }
913
914 static int
915 rump_vop_whiteout(void *v)
916 {
917 struct vop_whiteout_args /* {
918 struct vnode *a_dvp;
919 struct componentname *a_cnp;
920 int a_flags;
921 } */ *ap = v;
922 struct vnode *dvp = ap->a_dvp;
923 struct rumpfs_node *rnd = dvp->v_data;
924 struct componentname *cnp = ap->a_cnp;
925 int flags = ap->a_flags;
926
927 switch (flags) {
928 case LOOKUP:
929 break;
930 case CREATE:
931 makedir(rnd, cnp, RUMPFS_WHITEOUT);
932 break;
933 case DELETE:
934 cnp->cn_flags &= ~DOWHITEOUT; /* cargo culting never fails ? */
935 freedir(rnd, cnp);
936 break;
937 default:
938 panic("unknown whiteout op %d", flags);
939 }
940
941 return 0;
942 }
943
944 static int
945 rump_vop_open(void *v)
946 {
947 struct vop_open_args /* {
948 struct vnode *a_vp;
949 int a_mode;
950 kauth_cred_t a_cred;
951 } */ *ap = v;
952 struct vnode *vp = ap->a_vp;
953 struct rumpfs_node *rn = vp->v_data;
954 int mode = ap->a_mode;
955 int error = EINVAL;
956
957 if (vp->v_type != VREG || (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0)
958 return 0;
959
960 if (mode & FREAD) {
961 if (rn->rn_readfd != -1)
962 return 0;
963 rn->rn_readfd = rumpuser_open(rn->rn_hostpath,
964 O_RDONLY, &error);
965 }
966
967 if (mode & FWRITE) {
968 if (rn->rn_writefd != -1)
969 return 0;
970 rn->rn_writefd = rumpuser_open(rn->rn_hostpath,
971 O_WRONLY, &error);
972 }
973
974 return error;
975 }
976
977 /* simple readdir. event omits dotstuff and periods */
978 static int
979 rump_vop_readdir(void *v)
980 {
981 struct vop_readdir_args /* {
982 struct vnode *a_vp;
983 struct uio *a_uio;
984 kauth_cred_t a_cred;
985 int *a_eofflag;
986 off_t **a_cookies;
987 int *a_ncookies;
988 } */ *ap = v;
989 struct vnode *vp = ap->a_vp;
990 struct uio *uio = ap->a_uio;
991 struct rumpfs_node *rnd = vp->v_data;
992 struct rumpfs_dent *rdent;
993 unsigned i;
994 int rv = 0;
995
996 /* seek to current entry */
997 for (i = 0, rdent = LIST_FIRST(&rnd->rn_dir);
998 (i < uio->uio_offset) && rdent;
999 i++, rdent = LIST_NEXT(rdent, rd_entries))
1000 continue;
1001 if (!rdent)
1002 goto out;
1003
1004 /* copy entries */
1005 for (; rdent && uio->uio_resid > 0;
1006 rdent = LIST_NEXT(rdent, rd_entries), i++) {
1007 struct dirent dent;
1008
1009 strlcpy(dent.d_name, rdent->rd_name, sizeof(dent.d_name));
1010 dent.d_namlen = strlen(dent.d_name);
1011 dent.d_reclen = _DIRENT_RECLEN(&dent, dent.d_namlen);
1012
1013 if (__predict_false(RDENT_ISWHITEOUT(rdent))) {
1014 dent.d_fileno = INO_WHITEOUT;
1015 dent.d_type = DT_WHT;
1016 } else {
1017 dent.d_fileno = rdent->rd_node->rn_va.va_fileid;
1018 dent.d_type = vtype2dt(rdent->rd_node->rn_va.va_type);
1019 }
1020
1021 if (uio->uio_resid < dent.d_reclen) {
1022 i--;
1023 break;
1024 }
1025
1026 rv = uiomove(&dent, dent.d_reclen, uio);
1027 if (rv) {
1028 i--;
1029 break;
1030 }
1031 }
1032
1033 out:
1034 if (ap->a_cookies) {
1035 *ap->a_ncookies = 0;
1036 *ap->a_cookies = NULL;
1037 }
1038 if (rdent)
1039 *ap->a_eofflag = 0;
1040 else
1041 *ap->a_eofflag = 1;
1042 uio->uio_offset = i;
1043
1044 return rv;
1045 }
1046
1047 static int
1048 rump_vop_read(void *v)
1049 {
1050 struct vop_read_args /* {
1051 struct vnode *a_vp;
1052 struct uio *a_uio;
1053 int ioflags a_ioflag;
1054 kauth_cred_t a_cred;
1055 }; */ *ap = v;
1056 struct vnode *vp = ap->a_vp;
1057 struct rumpfs_node *rn = vp->v_data;
1058 struct uio *uio = ap->a_uio;
1059 uint8_t *buf;
1060 size_t bufsize;
1061 ssize_t n;
1062 int error = 0;
1063
1064 if (rn->rn_readfd == -1)
1065 return EOPNOTSUPP;
1066
1067 bufsize = uio->uio_resid;
1068 buf = kmem_alloc(bufsize, KM_SLEEP);
1069 if ((n = rumpuser_pread(rn->rn_readfd, buf, bufsize,
1070 uio->uio_offset + rn->rn_offset, &error)) == -1)
1071 goto out;
1072 KASSERT(n <= bufsize);
1073 error = uiomove(buf, n, uio);
1074
1075 out:
1076 kmem_free(buf, bufsize);
1077 return error;
1078 }
1079
1080 static int
1081 rump_vop_write(void *v)
1082 {
1083 struct vop_read_args /* {
1084 struct vnode *a_vp;
1085 struct uio *a_uio;
1086 int ioflags a_ioflag;
1087 kauth_cred_t a_cred;
1088 }; */ *ap = v;
1089 struct vnode *vp = ap->a_vp;
1090 struct rumpfs_node *rn = vp->v_data;
1091 struct uio *uio = ap->a_uio;
1092 uint8_t *buf;
1093 size_t bufsize;
1094 ssize_t n;
1095 int error = 0;
1096
1097 if (rn->rn_writefd == -1)
1098 return EOPNOTSUPP;
1099
1100 bufsize = uio->uio_resid;
1101 buf = kmem_alloc(bufsize, KM_SLEEP);
1102 error = uiomove(buf, bufsize, uio);
1103 if (error)
1104 goto out;
1105 KASSERT(uio->uio_resid == 0);
1106 n = rumpuser_pwrite(rn->rn_writefd, buf, bufsize,
1107 (uio->uio_offset-bufsize) + rn->rn_offset, &error);
1108 if (n >= 0) {
1109 KASSERT(n <= bufsize);
1110 uio->uio_resid = bufsize - n;
1111 }
1112
1113 out:
1114 kmem_free(buf, bufsize);
1115 return error;
1116 }
1117
1118 static int
1119 rump_vop_pathconf(void *v)
1120 {
1121 struct vop_pathconf_args /* {
1122 struct vnode *a_vp;
1123 int a_name;
1124 register_t *a_retval;
1125 }; */ *ap = v;
1126 int name = ap->a_name;
1127 register_t *retval = ap->a_retval;
1128
1129 switch (name) {
1130 case _PC_LINK_MAX:
1131 *retval = LINK_MAX;
1132 return 0;
1133 case _PC_NAME_MAX:
1134 *retval = NAME_MAX;
1135 return 0;
1136 case _PC_PATH_MAX:
1137 *retval = PATH_MAX;
1138 return 0;
1139 case _PC_PIPE_BUF:
1140 *retval = PIPE_BUF;
1141 return 0;
1142 case _PC_CHOWN_RESTRICTED:
1143 *retval = 1;
1144 return 0;
1145 case _PC_NO_TRUNC:
1146 *retval = 1;
1147 return 0;
1148 case _PC_SYNC_IO:
1149 *retval = 1;
1150 return 0;
1151 case _PC_FILESIZEBITS:
1152 *retval = 43; /* this one goes to 11 */
1153 return 0;
1154 case _PC_SYMLINK_MAX:
1155 *retval = MAXPATHLEN;
1156 return 0;
1157 case _PC_2_SYMLINKS:
1158 *retval = 1;
1159 return 0;
1160 default:
1161 return EINVAL;
1162 }
1163 }
1164
1165 static int
1166 rump_vop_success(void *v)
1167 {
1168
1169 return 0;
1170 }
1171
1172 static int
1173 rump_vop_inactive(void *v)
1174 {
1175 struct vop_inactive_args /* {
1176 struct vnode *a_vp;
1177 bool *a_recycle;
1178 } */ *ap = v;
1179 struct vnode *vp = ap->a_vp;
1180 struct rumpfs_node *rn = vp->v_data;
1181 int error;
1182
1183 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST && vp->v_type == VREG) {
1184 if (rn->rn_readfd != -1) {
1185 rumpuser_close(rn->rn_readfd, &error);
1186 rn->rn_readfd = -1;
1187 }
1188 if (rn->rn_writefd != -1) {
1189 rumpuser_close(rn->rn_writefd, &error);
1190 rn->rn_writefd = -1;
1191 }
1192 }
1193 *ap->a_recycle = (rn->rn_flags & RUMPNODE_CANRECLAIM) ? true : false;
1194
1195 VOP_UNLOCK(vp);
1196 return 0;
1197 }
1198
1199 static int
1200 rump_vop_reclaim(void *v)
1201 {
1202 struct vop_reclaim_args /* {
1203 struct vnode *a_vp;
1204 } */ *ap = v;
1205 struct vnode *vp = ap->a_vp;
1206 struct rumpfs_node *rn = vp->v_data;
1207
1208 mutex_enter(&reclock);
1209 rn->rn_vp = NULL;
1210 mutex_exit(&reclock);
1211 genfs_node_destroy(vp);
1212 vp->v_data = NULL;
1213
1214 if (rn->rn_flags & RUMPNODE_CANRECLAIM) {
1215 if (vp->v_type == VLNK)
1216 PNBUF_PUT(rn->rn_linktarg);
1217 if (rn->rn_hostpath)
1218 free(rn->rn_hostpath, M_TEMP);
1219 kmem_free(rn, sizeof(*rn));
1220 }
1221
1222 return 0;
1223 }
1224
1225 static int
1226 rump_vop_spec(void *v)
1227 {
1228 struct vop_generic_args *ap = v;
1229 int (**opvec)(void *);
1230
1231 switch (ap->a_desc->vdesc_offset) {
1232 case VOP_ACCESS_DESCOFFSET:
1233 case VOP_GETATTR_DESCOFFSET:
1234 case VOP_LOCK_DESCOFFSET:
1235 case VOP_UNLOCK_DESCOFFSET:
1236 case VOP_RECLAIM_DESCOFFSET:
1237 opvec = rump_vnodeop_p;
1238 break;
1239 default:
1240 opvec = spec_vnodeop_p;
1241 break;
1242 }
1243
1244 return VOCALL(opvec, ap->a_desc->vdesc_offset, v);
1245 }
1246
1247 /*
1248 * Begin vfs-level stuff
1249 */
1250
1251 VFS_PROTOS(rumpfs);
1252 struct vfsops rumpfs_vfsops = {
1253 .vfs_name = MOUNT_RUMPFS,
1254 .vfs_min_mount_data = 0,
1255 .vfs_mount = rumpfs_mount,
1256 .vfs_start = (void *)nullop,
1257 .vfs_unmount = rumpfs_unmount,
1258 .vfs_root = rumpfs_root,
1259 .vfs_quotactl = (void *)eopnotsupp,
1260 .vfs_statvfs = genfs_statvfs,
1261 .vfs_sync = (void *)nullop,
1262 .vfs_vget = rumpfs_vget,
1263 .vfs_fhtovp = (void *)eopnotsupp,
1264 .vfs_vptofh = (void *)eopnotsupp,
1265 .vfs_init = rumpfs_init,
1266 .vfs_reinit = NULL,
1267 .vfs_done = rumpfs_done,
1268 .vfs_mountroot = rumpfs_mountroot,
1269 .vfs_snapshot = (void *)eopnotsupp,
1270 .vfs_extattrctl = (void *)eopnotsupp,
1271 .vfs_suspendctl = (void *)eopnotsupp,
1272 .vfs_renamelock_enter = genfs_renamelock_enter,
1273 .vfs_renamelock_exit = genfs_renamelock_exit,
1274 .vfs_opv_descs = rump_opv_descs,
1275 /* vfs_refcount */
1276 /* vfs_list */
1277 };
1278
1279 static int
1280 rumpfs_mountfs(struct mount *mp)
1281 {
1282 struct rumpfs_mount *rfsmp;
1283 struct rumpfs_node *rn;
1284 int error;
1285
1286 rfsmp = kmem_alloc(sizeof(*rfsmp), KM_SLEEP);
1287
1288 rn = makeprivate(VDIR, NODEV, DEV_BSIZE);
1289 rn->rn_parent = rn;
1290 if ((error = makevnode(mp, rn, &rfsmp->rfsmp_rvp)) != 0)
1291 return error;
1292
1293 rfsmp->rfsmp_rvp->v_vflag |= VV_ROOT;
1294 VOP_UNLOCK(rfsmp->rfsmp_rvp);
1295
1296 mp->mnt_data = rfsmp;
1297 mp->mnt_stat.f_namemax = MAXNAMLEN;
1298 mp->mnt_stat.f_iosize = 512;
1299 mp->mnt_flag |= MNT_LOCAL;
1300 mp->mnt_iflag |= IMNT_MPSAFE;
1301 vfs_getnewfsid(mp);
1302
1303 return 0;
1304 }
1305
1306 int
1307 rumpfs_mount(struct mount *mp, const char *mntpath, void *arg, size_t *alen)
1308 {
1309 int error;
1310
1311 error = set_statvfs_info(mntpath, UIO_USERSPACE, "rumpfs", UIO_SYSSPACE,
1312 mp->mnt_op->vfs_name, mp, curlwp);
1313 if (error)
1314 return error;
1315
1316 return rumpfs_mountfs(mp);
1317 }
1318
1319 int
1320 rumpfs_unmount(struct mount *mp, int mntflags)
1321 {
1322 struct rumpfs_mount *rfsmp = mp->mnt_data;
1323 int flags = 0, error;
1324
1325 if (panicstr || mntflags & MNT_FORCE)
1326 flags |= FORCECLOSE;
1327
1328 if ((error = vflush(mp, rfsmp->rfsmp_rvp, flags)) != 0)
1329 return error;
1330 vgone(rfsmp->rfsmp_rvp); /* XXX */
1331
1332 kmem_free(rfsmp, sizeof(*rfsmp));
1333
1334 return 0;
1335 }
1336
1337 int
1338 rumpfs_root(struct mount *mp, struct vnode **vpp)
1339 {
1340 struct rumpfs_mount *rfsmp = mp->mnt_data;
1341
1342 vref(rfsmp->rfsmp_rvp);
1343 vn_lock(rfsmp->rfsmp_rvp, LK_EXCLUSIVE | LK_RETRY);
1344 *vpp = rfsmp->rfsmp_rvp;
1345 return 0;
1346 }
1347
1348 int
1349 rumpfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1350 {
1351
1352 return EOPNOTSUPP;
1353 }
1354
1355 void
1356 rumpfs_init()
1357 {
1358
1359 CTASSERT(RUMP_ETFS_SIZE_ENDOFF == RUMPBLK_SIZENOTSET);
1360
1361 mutex_init(&reclock, MUTEX_DEFAULT, IPL_NONE);
1362 mutex_init(&etfs_lock, MUTEX_DEFAULT, IPL_NONE);
1363 }
1364
1365 void
1366 rumpfs_done()
1367 {
1368
1369 mutex_destroy(&reclock);
1370 mutex_destroy(&etfs_lock);
1371 }
1372
1373 int
1374 rumpfs_mountroot()
1375 {
1376 struct mount *mp;
1377 int error;
1378
1379 if ((error = vfs_rootmountalloc(MOUNT_RUMPFS, "rootdev", &mp)) != 0) {
1380 vrele(rootvp);
1381 return error;
1382 }
1383
1384 if ((error = rumpfs_mountfs(mp)) != 0)
1385 panic("mounting rootfs failed: %d", error);
1386
1387 mutex_enter(&mountlist_lock);
1388 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1389 mutex_exit(&mountlist_lock);
1390
1391 error = set_statvfs_info("/", UIO_SYSSPACE, "rumpfs", UIO_SYSSPACE,
1392 mp->mnt_op->vfs_name, mp, curlwp);
1393 if (error)
1394 panic("set_statvfs_info failed for rootfs: %d", error);
1395
1396 vfs_unbusy(mp, false, NULL);
1397
1398 return 0;
1399 }
1400