rumpfs.c revision 1.68 1 /* $NetBSD: rumpfs.c,v 1.68 2010/11/11 15:05:54 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: rumpfs.c,v 1.68 2010/11/11 15:05:54 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/dirent.h>
34 #include <sys/errno.h>
35 #include <sys/filedesc.h>
36 #include <sys/fcntl.h>
37 #include <sys/kauth.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/mount.h>
41 #include <sys/namei.h>
42 #include <sys/lock.h>
43 #include <sys/lockf.h>
44 #include <sys/queue.h>
45 #include <sys/stat.h>
46 #include <sys/syscallargs.h>
47 #include <sys/vnode.h>
48
49 #include <miscfs/fifofs/fifo.h>
50 #include <miscfs/specfs/specdev.h>
51 #include <miscfs/genfs/genfs.h>
52 #include <miscfs/genfs/genfs_node.h>
53
54 #include <rump/rumpuser.h>
55
56 #include "rump_private.h"
57 #include "rump_vfs_private.h"
58
59 static int rump_vop_lookup(void *);
60 static int rump_vop_getattr(void *);
61 static int rump_vop_mkdir(void *);
62 static int rump_vop_rmdir(void *);
63 static int rump_vop_mknod(void *);
64 static int rump_vop_create(void *);
65 static int rump_vop_inactive(void *);
66 static int rump_vop_reclaim(void *);
67 static int rump_vop_success(void *);
68 static int rump_vop_readdir(void *);
69 static int rump_vop_spec(void *);
70 static int rump_vop_read(void *);
71 static int rump_vop_write(void *);
72 static int rump_vop_open(void *);
73 static int rump_vop_symlink(void *);
74 static int rump_vop_readlink(void *);
75 static int rump_vop_whiteout(void *);
76
77 int (**fifo_vnodeop_p)(void *);
78 const struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
79 { &vop_default_desc, vn_default_error },
80 { NULL, NULL }
81 };
82 const struct vnodeopv_desc fifo_vnodeop_opv_desc =
83 { &fifo_vnodeop_p, fifo_vnodeop_entries };
84
85 int (**rump_vnodeop_p)(void *);
86 const struct vnodeopv_entry_desc rump_vnodeop_entries[] = {
87 { &vop_default_desc, vn_default_error },
88 { &vop_lookup_desc, rump_vop_lookup },
89 { &vop_getattr_desc, rump_vop_getattr },
90 { &vop_mkdir_desc, rump_vop_mkdir },
91 { &vop_rmdir_desc, rump_vop_rmdir },
92 { &vop_mknod_desc, rump_vop_mknod },
93 { &vop_create_desc, rump_vop_create },
94 { &vop_symlink_desc, rump_vop_symlink },
95 { &vop_readlink_desc, rump_vop_readlink },
96 { &vop_access_desc, rump_vop_success },
97 { &vop_readdir_desc, rump_vop_readdir },
98 { &vop_read_desc, rump_vop_read },
99 { &vop_write_desc, rump_vop_write },
100 { &vop_open_desc, rump_vop_open },
101 { &vop_seek_desc, genfs_seek },
102 { &vop_getpages_desc, genfs_getpages },
103 { &vop_putpages_desc, genfs_putpages },
104 { &vop_whiteout_desc, rump_vop_whiteout },
105 { &vop_fsync_desc, rump_vop_success },
106 { &vop_lock_desc, genfs_lock },
107 { &vop_unlock_desc, genfs_unlock },
108 { &vop_islocked_desc, genfs_islocked },
109 { &vop_inactive_desc, rump_vop_inactive },
110 { &vop_reclaim_desc, rump_vop_reclaim },
111 { &vop_remove_desc, genfs_eopnotsupp },
112 { &vop_link_desc, genfs_eopnotsupp },
113 { NULL, NULL }
114 };
115 const struct vnodeopv_desc rump_vnodeop_opv_desc =
116 { &rump_vnodeop_p, rump_vnodeop_entries };
117
118 int (**rump_specop_p)(void *);
119 const struct vnodeopv_entry_desc rump_specop_entries[] = {
120 { &vop_default_desc, rump_vop_spec },
121 { NULL, NULL }
122 };
123 const struct vnodeopv_desc rump_specop_opv_desc =
124 { &rump_specop_p, rump_specop_entries };
125
126 const struct vnodeopv_desc * const rump_opv_descs[] = {
127 &rump_vnodeop_opv_desc,
128 &rump_specop_opv_desc,
129 NULL
130 };
131
132 #define RUMPFS_WHITEOUT NULL
133 #define RDENT_ISWHITEOUT(rdp) (rdp->rd_node == RUMPFS_WHITEOUT)
134 struct rumpfs_dent {
135 char *rd_name;
136 int rd_namelen;
137 struct rumpfs_node *rd_node;
138
139 LIST_ENTRY(rumpfs_dent) rd_entries;
140 };
141
142 struct genfs_ops rumpfs_genfsops = {
143 .gop_size = genfs_size,
144 .gop_write = genfs_gop_write,
145
146 /* optional */
147 .gop_alloc = NULL,
148 .gop_markupdate = NULL,
149 };
150
151 struct rumpfs_node {
152 struct genfs_node rn_gn;
153 struct vattr rn_va;
154 struct vnode *rn_vp;
155 char *rn_hostpath;
156 int rn_flags;
157
158 union {
159 struct { /* VREG */
160 int readfd;
161 int writefd;
162 uint64_t offset;
163 } reg;
164 struct { /* VDIR */
165 LIST_HEAD(, rumpfs_dent) dents;
166 struct rumpfs_node *parent;
167 int flags;
168 } dir;
169 struct {
170 char *target;
171 size_t len;
172 } link;
173 } rn_u;
174 };
175 #define rn_readfd rn_u.reg.readfd
176 #define rn_writefd rn_u.reg.writefd
177 #define rn_offset rn_u.reg.offset
178 #define rn_dir rn_u.dir.dents
179 #define rn_parent rn_u.dir.parent
180 #define rn_linktarg rn_u.link.target
181 #define rn_linklen rn_u.link.len
182
183 #define RUMPNODE_CANRECLAIM 0x01
184 #define RUMPNODE_DIR_ET 0x02
185 #define RUMPNODE_DIR_ETSUBS 0x04
186
187 struct rumpfs_mount {
188 struct vnode *rfsmp_rvp;
189 };
190
191 static struct rumpfs_node *makeprivate(enum vtype, dev_t, off_t);
192
193 /*
194 * Extra Terrestrial stuff. We map a given key (pathname) to a file on
195 * the host FS. ET phones home only from the root node of rumpfs.
196 *
197 * When an etfs node is removed, a vnode potentially behind it is not
198 * immediately recycled.
199 */
200
201 struct etfs {
202 char et_key[MAXPATHLEN];
203 size_t et_keylen;
204 bool et_prefixkey;
205 bool et_removing;
206 devminor_t et_blkmin;
207
208 LIST_ENTRY(etfs) et_entries;
209
210 struct rumpfs_node *et_rn;
211 };
212 static kmutex_t etfs_lock;
213 static LIST_HEAD(, etfs) etfs_list = LIST_HEAD_INITIALIZER(etfs_list);
214
215 static enum vtype
216 ettype_to_vtype(enum rump_etfs_type et)
217 {
218 enum vtype vt;
219
220 switch (et) {
221 case RUMP_ETFS_REG:
222 vt = VREG;
223 break;
224 case RUMP_ETFS_BLK:
225 vt = VBLK;
226 break;
227 case RUMP_ETFS_CHR:
228 vt = VCHR;
229 break;
230 case RUMP_ETFS_DIR:
231 vt = VDIR;
232 break;
233 case RUMP_ETFS_DIR_SUBDIRS:
234 vt = VDIR;
235 break;
236 default:
237 panic("invalid et type: %d", et);
238 }
239
240 return vt;
241 }
242
243 static enum vtype
244 hft_to_vtype(int hft)
245 {
246 enum vtype vt;
247
248 switch (hft) {
249 case RUMPUSER_FT_OTHER:
250 vt = VNON;
251 break;
252 case RUMPUSER_FT_DIR:
253 vt = VDIR;
254 break;
255 case RUMPUSER_FT_REG:
256 vt = VREG;
257 break;
258 case RUMPUSER_FT_BLK:
259 vt = VBLK;
260 break;
261 case RUMPUSER_FT_CHR:
262 vt = VCHR;
263 break;
264 default:
265 vt = VNON;
266 break;
267 }
268
269 return vt;
270 }
271
272 static bool
273 etfs_find(const char *key, struct etfs **etp, bool forceprefix)
274 {
275 struct etfs *et;
276 size_t keylen = strlen(key);
277
278 KASSERT(mutex_owned(&etfs_lock));
279
280 LIST_FOREACH(et, &etfs_list, et_entries) {
281 if ((keylen == et->et_keylen || et->et_prefixkey || forceprefix)
282 && strncmp(key, et->et_key, et->et_keylen) == 0) {
283 if (etp)
284 *etp = et;
285 return true;
286 }
287 }
288
289 return false;
290 }
291
292 #define REGDIR(ftype) \
293 ((ftype) == RUMP_ETFS_DIR || (ftype) == RUMP_ETFS_DIR_SUBDIRS)
294 static int
295 doregister(const char *key, const char *hostpath,
296 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
297 {
298 char buf[9];
299 struct etfs *et;
300 struct rumpfs_node *rn;
301 uint64_t fsize;
302 dev_t rdev = NODEV;
303 devminor_t dmin = -1;
304 int hft, error;
305
306 if (rumpuser_getfileinfo(hostpath, &fsize, &hft, &error))
307 return error;
308
309 /* etfs directory requires a directory on the host */
310 if (REGDIR(ftype)) {
311 if (hft != RUMPUSER_FT_DIR)
312 return ENOTDIR;
313 if (begin != 0)
314 return EISDIR;
315 if (size != RUMP_ETFS_SIZE_ENDOFF)
316 return EISDIR;
317 size = fsize;
318 } else {
319 if (begin > fsize)
320 return EINVAL;
321 if (size == RUMP_ETFS_SIZE_ENDOFF)
322 size = fsize - begin;
323 if (begin + size > fsize)
324 return EINVAL;
325 }
326
327 if (ftype == RUMP_ETFS_BLK || ftype == RUMP_ETFS_CHR) {
328 error = rumpblk_register(hostpath, &dmin, begin, size);
329 if (error != 0) {
330 return error;
331 }
332 rdev = makedev(RUMPBLK_DEVMAJOR, dmin);
333 }
334
335 et = kmem_alloc(sizeof(*et), KM_SLEEP);
336 strcpy(et->et_key, key);
337 et->et_keylen = strlen(et->et_key);
338 et->et_rn = rn = makeprivate(ettype_to_vtype(ftype), rdev, size);
339 et->et_removing = false;
340 et->et_blkmin = dmin;
341
342 if (ftype == RUMP_ETFS_REG || REGDIR(ftype) || et->et_blkmin != -1) {
343 size_t len = strlen(hostpath)+1;
344
345 rn->rn_hostpath = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
346 memcpy(rn->rn_hostpath, hostpath, len);
347 rn->rn_offset = begin;
348 }
349
350 if (REGDIR(ftype)) {
351 rn->rn_flags |= RUMPNODE_DIR_ET;
352 et->et_prefixkey = true;
353 } else {
354 et->et_prefixkey = false;
355 }
356
357 if (ftype == RUMP_ETFS_DIR_SUBDIRS)
358 rn->rn_flags |= RUMPNODE_DIR_ETSUBS;
359
360 mutex_enter(&etfs_lock);
361 if (etfs_find(key, NULL, REGDIR(ftype))) {
362 mutex_exit(&etfs_lock);
363 if (et->et_blkmin != -1)
364 rumpblk_deregister(hostpath);
365 if (et->et_rn->rn_hostpath != NULL)
366 free(et->et_rn->rn_hostpath, M_TEMP);
367 kmem_free(et->et_rn, sizeof(*et->et_rn));
368 kmem_free(et, sizeof(*et));
369 return EEXIST;
370 }
371 LIST_INSERT_HEAD(&etfs_list, et, et_entries);
372 mutex_exit(&etfs_lock);
373
374 if (ftype == RUMP_ETFS_BLK) {
375 format_bytes(buf, sizeof(buf), size);
376 aprint_verbose("%s: hostpath %s (%s)\n", key, hostpath, buf);
377 }
378
379 return 0;
380 }
381 #undef REGDIR
382
383 int
384 rump_etfs_register(const char *key, const char *hostpath,
385 enum rump_etfs_type ftype)
386 {
387
388 return doregister(key, hostpath, ftype, 0, RUMP_ETFS_SIZE_ENDOFF);
389 }
390
391 int
392 rump_etfs_register_withsize(const char *key, const char *hostpath,
393 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
394 {
395
396 return doregister(key, hostpath, ftype, begin, size);
397 }
398
399 /* remove etfs mapping. caller's responsibility to make sure it's not in use */
400 int
401 rump_etfs_remove(const char *key)
402 {
403 struct etfs *et;
404 size_t keylen = strlen(key);
405 int rv;
406
407 mutex_enter(&etfs_lock);
408 LIST_FOREACH(et, &etfs_list, et_entries) {
409 if (keylen == et->et_keylen && strcmp(et->et_key, key) == 0) {
410 if (et->et_removing)
411 et = NULL;
412 else
413 et->et_removing = true;
414 break;
415 }
416 }
417 mutex_exit(&etfs_lock);
418 if (!et)
419 return ENOENT;
420
421 /*
422 * ok, we know what we want to remove and have signalled there
423 * actually are men at work. first, unregister from rumpblk
424 */
425 if (et->et_blkmin != -1) {
426 rv = rumpblk_deregister(et->et_rn->rn_hostpath);
427 } else {
428 rv = 0;
429 }
430 KASSERT(rv == 0);
431
432 /* then do the actual removal */
433 mutex_enter(&etfs_lock);
434 LIST_REMOVE(et, et_entries);
435 mutex_exit(&etfs_lock);
436
437 /* node is unreachable, safe to nuke all device copies */
438 if (et->et_blkmin != -1)
439 vdevgone(RUMPBLK_DEVMAJOR, et->et_blkmin, et->et_blkmin, VBLK);
440
441 if (et->et_rn->rn_hostpath != NULL)
442 free(et->et_rn->rn_hostpath, M_TEMP);
443 kmem_free(et->et_rn, sizeof(*et->et_rn));
444 kmem_free(et, sizeof(*et));
445
446 return 0;
447 }
448
449 /*
450 * rumpfs
451 */
452
453 #define INO_WHITEOUT 1
454 static int lastino = 2;
455 static kmutex_t reclock;
456
457 static struct rumpfs_node *
458 makeprivate(enum vtype vt, dev_t rdev, off_t size)
459 {
460 struct rumpfs_node *rn;
461 struct vattr *va;
462 struct timespec ts;
463
464 rn = kmem_zalloc(sizeof(*rn), KM_SLEEP);
465
466 switch (vt) {
467 case VDIR:
468 LIST_INIT(&rn->rn_dir);
469 break;
470 case VREG:
471 rn->rn_readfd = -1;
472 rn->rn_writefd = -1;
473 break;
474 default:
475 break;
476 }
477
478 nanotime(&ts);
479
480 va = &rn->rn_va;
481 va->va_type = vt;
482 va->va_mode = 0755;
483 if (vt == VDIR)
484 va->va_nlink = 2;
485 else
486 va->va_nlink = 1;
487 va->va_uid = 0;
488 va->va_gid = 0;
489 va->va_fsid =
490 va->va_fileid = atomic_inc_uint_nv(&lastino);
491 va->va_size = size;
492 va->va_blocksize = 512;
493 va->va_atime = ts;
494 va->va_mtime = ts;
495 va->va_ctime = ts;
496 va->va_birthtime = ts;
497 va->va_gen = 0;
498 va->va_flags = 0;
499 va->va_rdev = rdev;
500 va->va_bytes = 512;
501 va->va_filerev = 0;
502 va->va_vaflags = 0;
503
504 return rn;
505 }
506
507 static int
508 makevnode(struct mount *mp, struct rumpfs_node *rn, struct vnode **vpp)
509 {
510 struct vnode *vp;
511 int (**vpops)(void *);
512 struct vattr *va = &rn->rn_va;
513 int rv;
514
515 KASSERT(!mutex_owned(&reclock));
516
517 if (va->va_type == VCHR || va->va_type == VBLK) {
518 vpops = rump_specop_p;
519 } else {
520 vpops = rump_vnodeop_p;
521 }
522
523 rv = getnewvnode(VT_RUMP, mp, vpops, &vp);
524 if (rv)
525 return rv;
526
527 vp->v_size = vp->v_writesize = va->va_size;
528 vp->v_type = va->va_type;
529
530 if (vpops == rump_specop_p) {
531 spec_node_init(vp, va->va_rdev);
532 }
533 vp->v_data = rn;
534
535 genfs_node_init(vp, &rumpfs_genfsops);
536 vn_lock(vp, LK_RETRY | LK_EXCLUSIVE);
537 mutex_enter(&reclock);
538 rn->rn_vp = vp;
539 mutex_exit(&reclock);
540
541 *vpp = vp;
542
543 return 0;
544 }
545
546
547 static void
548 makedir(struct rumpfs_node *rnd,
549 struct componentname *cnp, struct rumpfs_node *rn)
550 {
551 struct rumpfs_dent *rdent;
552
553 rdent = kmem_alloc(sizeof(*rdent), KM_SLEEP);
554 rdent->rd_name = kmem_alloc(cnp->cn_namelen+1, KM_SLEEP);
555 rdent->rd_node = rn;
556 strlcpy(rdent->rd_name, cnp->cn_nameptr, cnp->cn_namelen+1);
557 rdent->rd_namelen = strlen(rdent->rd_name);
558
559 LIST_INSERT_HEAD(&rnd->rn_dir, rdent, rd_entries);
560 }
561
562 static void
563 freedir(struct rumpfs_node *rnd, struct componentname *cnp)
564 {
565 struct rumpfs_dent *rd = NULL;
566
567 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
568 if (rd->rd_namelen == cnp->cn_namelen &&
569 strncmp(rd->rd_name, cnp->cn_nameptr,
570 cnp->cn_namelen) == 0)
571 break;
572 }
573 if (rd == NULL)
574 panic("could not find directory entry: %s", cnp->cn_nameptr);
575
576 LIST_REMOVE(rd, rd_entries);
577 kmem_free(rd->rd_name, rd->rd_namelen+1);
578 kmem_free(rd, sizeof(*rd));
579 }
580
581 /*
582 * Simple lookup for rump file systems.
583 *
584 * uhm, this is twisted. C F C C, hope of C C F C looming
585 */
586 static int
587 rump_vop_lookup(void *v)
588 {
589 struct vop_lookup_args /* {
590 struct vnode *a_dvp;
591 struct vnode **a_vpp;
592 struct componentname *a_cnp;
593 }; */ *ap = v;
594 struct componentname *cnp = ap->a_cnp;
595 struct vnode *dvp = ap->a_dvp;
596 struct vnode **vpp = ap->a_vpp;
597 struct vnode *vp;
598 struct rumpfs_node *rnd = dvp->v_data, *rn;
599 struct rumpfs_dent *rd = NULL;
600 struct etfs *et;
601 bool dotdot = (cnp->cn_flags & ISDOTDOT) != 0;
602 int rv = 0;
603
604 /* check for dot, return directly if the case */
605 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
606 vref(dvp);
607 *vpp = dvp;
608 return 0;
609 }
610
611 /* we handle only some "non-special" cases */
612 if (!(((cnp->cn_flags & ISLASTCN) == 0) || (cnp->cn_nameiop != RENAME)))
613 return EOPNOTSUPP;
614
615 /* check for etfs */
616 if (dvp == rootvnode && cnp->cn_nameiop == LOOKUP) {
617 bool found;
618 mutex_enter(&etfs_lock);
619 found = etfs_find(cnp->cn_pnbuf, &et, false);
620 mutex_exit(&etfs_lock);
621
622 if (found) {
623 char *offset;
624
625 offset = strstr(cnp->cn_pnbuf, et->et_key);
626 KASSERT(offset);
627
628 rn = et->et_rn;
629 cnp->cn_consume += et->et_keylen
630 - (cnp->cn_nameptr - offset) - cnp->cn_namelen;
631 if (rn->rn_va.va_type != VDIR)
632 cnp->cn_flags &= ~REQUIREDIR;
633 goto getvnode;
634 }
635 }
636
637 if (rnd->rn_flags & RUMPNODE_DIR_ET) {
638 uint64_t fsize;
639 char *newpath;
640 size_t newpathlen;
641 int hft, error;
642
643 if (dotdot)
644 return EOPNOTSUPP;
645
646 newpathlen = strlen(rnd->rn_hostpath) + 1 + cnp->cn_namelen + 1;
647 newpath = malloc(newpathlen, M_TEMP, M_WAITOK);
648
649 strlcpy(newpath, rnd->rn_hostpath, newpathlen);
650 strlcat(newpath, "/", newpathlen);
651 strlcat(newpath, cnp->cn_nameptr, newpathlen);
652
653 if (rumpuser_getfileinfo(newpath, &fsize, &hft, &error)) {
654 free(newpath, M_TEMP);
655 return error;
656 }
657
658 /* allow only dirs and regular files */
659 if (hft != RUMPUSER_FT_REG && hft != RUMPUSER_FT_DIR) {
660 free(newpath, M_TEMP);
661 return ENOENT;
662 }
663
664 rn = makeprivate(hft_to_vtype(hft), NODEV, fsize);
665 rn->rn_flags |= RUMPNODE_CANRECLAIM;
666 if (rnd->rn_flags & RUMPNODE_DIR_ETSUBS) {
667 rn->rn_flags |= RUMPNODE_DIR_ET | RUMPNODE_DIR_ETSUBS;
668 }
669 rn->rn_hostpath = newpath;
670
671 goto getvnode;
672 } else {
673 if (dotdot) {
674 rn = rnd->rn_parent;
675 goto getvnode;
676 } else {
677 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
678 if (rd->rd_namelen == cnp->cn_namelen &&
679 strncmp(rd->rd_name, cnp->cn_nameptr,
680 cnp->cn_namelen) == 0)
681 break;
682 }
683 }
684 }
685
686 if (!rd && ((cnp->cn_flags & ISLASTCN) == 0||cnp->cn_nameiop != CREATE))
687 return ENOENT;
688
689 if (!rd && (cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
690 cnp->cn_flags |= SAVENAME;
691 return EJUSTRETURN;
692 }
693 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == DELETE)
694 cnp->cn_flags |= SAVENAME;
695
696 rn = rd->rd_node;
697
698 getvnode:
699 KASSERT(rn);
700 if (dotdot)
701 VOP_UNLOCK(dvp);
702 mutex_enter(&reclock);
703 if ((vp = rn->rn_vp)) {
704 mutex_enter(&vp->v_interlock);
705 mutex_exit(&reclock);
706 if (vget(vp, LK_EXCLUSIVE)) {
707 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
708 goto getvnode;
709 }
710 *vpp = vp;
711 } else {
712 mutex_exit(&reclock);
713 rv = makevnode(dvp->v_mount, rn, vpp);
714 }
715 if (dotdot)
716 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
717
718 return rv;
719 }
720
721 static int
722 rump_vop_getattr(void *v)
723 {
724 struct vop_getattr_args /* {
725 struct vnode *a_vp;
726 struct vattr *a_vap;
727 kauth_cred_t a_cred;
728 } */ *ap = v;
729 struct rumpfs_node *rn = ap->a_vp->v_data;
730
731 memcpy(ap->a_vap, &rn->rn_va, sizeof(struct vattr));
732 return 0;
733 }
734
735 static int
736 rump_vop_mkdir(void *v)
737 {
738 struct vop_mkdir_args /* {
739 struct vnode *a_dvp;
740 struct vnode **a_vpp;
741 struct componentname *a_cnp;
742 struct vattr *a_vap;
743 }; */ *ap = v;
744 struct vnode *dvp = ap->a_dvp;
745 struct vnode **vpp = ap->a_vpp;
746 struct componentname *cnp = ap->a_cnp;
747 struct rumpfs_node *rnd = dvp->v_data, *rn;
748 int rv = 0;
749
750 rn = makeprivate(VDIR, NODEV, DEV_BSIZE);
751 rn->rn_parent = rnd;
752 rv = makevnode(dvp->v_mount, rn, vpp);
753 if (rv)
754 goto out;
755
756 makedir(rnd, cnp, rn);
757
758 out:
759 PNBUF_PUT(cnp->cn_pnbuf);
760 vput(dvp);
761 return rv;
762 }
763
764 static int
765 rump_vop_rmdir(void *v)
766 {
767 struct vop_rmdir_args /* {
768 struct vnode *a_dvp;
769 struct vnode *a_vp;
770 struct componentname *a_cnp;
771 }; */ *ap = v;
772 struct vnode *dvp = ap->a_dvp;
773 struct vnode *vp = ap->a_vp;
774 struct componentname *cnp = ap->a_cnp;
775 struct rumpfs_node *rnd = dvp->v_data;
776 struct rumpfs_node *rn = vp->v_data;
777 int rv = 0;
778
779 if (!LIST_EMPTY(&rn->rn_dir)) {
780 rv = ENOTEMPTY;
781 goto out;
782 }
783
784 freedir(rnd, cnp);
785 rn->rn_flags |= RUMPNODE_CANRECLAIM;
786
787 out:
788 PNBUF_PUT(cnp->cn_pnbuf);
789 vput(dvp);
790 vput(vp);
791
792 return rv;
793 }
794
795 static int
796 rump_vop_mknod(void *v)
797 {
798 struct vop_mknod_args /* {
799 struct vnode *a_dvp;
800 struct vnode **a_vpp;
801 struct componentname *a_cnp;
802 struct vattr *a_vap;
803 }; */ *ap = v;
804 struct vnode *dvp = ap->a_dvp;
805 struct vnode **vpp = ap->a_vpp;
806 struct componentname *cnp = ap->a_cnp;
807 struct vattr *va = ap->a_vap;
808 struct rumpfs_node *rnd = dvp->v_data, *rn;
809 int rv;
810
811 rn = makeprivate(va->va_type, va->va_rdev, DEV_BSIZE);
812 rv = makevnode(dvp->v_mount, rn, vpp);
813 if (rv)
814 goto out;
815
816 makedir(rnd, cnp, rn);
817
818 out:
819 PNBUF_PUT(cnp->cn_pnbuf);
820 vput(dvp);
821 return rv;
822 }
823
824 static int
825 rump_vop_create(void *v)
826 {
827 struct vop_create_args /* {
828 struct vnode *a_dvp;
829 struct vnode **a_vpp;
830 struct componentname *a_cnp;
831 struct vattr *a_vap;
832 }; */ *ap = v;
833 struct vnode *dvp = ap->a_dvp;
834 struct vnode **vpp = ap->a_vpp;
835 struct componentname *cnp = ap->a_cnp;
836 struct vattr *va = ap->a_vap;
837 struct rumpfs_node *rnd = dvp->v_data, *rn;
838 off_t newsize;
839 int rv;
840
841 newsize = va->va_type == VSOCK ? DEV_BSIZE : 0;
842 rn = makeprivate(va->va_type, NODEV, newsize);
843 rv = makevnode(dvp->v_mount, rn, vpp);
844 if (rv)
845 goto out;
846
847 makedir(rnd, cnp, rn);
848
849 out:
850 PNBUF_PUT(cnp->cn_pnbuf);
851 vput(dvp);
852 return rv;
853 }
854
855 static int
856 rump_vop_symlink(void *v)
857 {
858 struct vop_symlink_args /* {
859 struct vnode *a_dvp;
860 struct vnode **a_vpp;
861 struct componentname *a_cnp;
862 struct vattr *a_vap;
863 char *a_target;
864 }; */ *ap = v;
865 struct vnode *dvp = ap->a_dvp;
866 struct vnode **vpp = ap->a_vpp;
867 struct componentname *cnp = ap->a_cnp;
868 struct rumpfs_node *rnd = dvp->v_data, *rn;
869 const char *target = ap->a_target;
870 size_t linklen;
871 int rv;
872
873 linklen = strlen(target);
874 KASSERT(linklen < MAXPATHLEN);
875 rn = makeprivate(VLNK, NODEV, linklen);
876 rv = makevnode(dvp->v_mount, rn, vpp);
877 if (rv)
878 goto out;
879
880 makedir(rnd, cnp, rn);
881
882 KASSERT(linklen < MAXPATHLEN);
883 rn->rn_linktarg = PNBUF_GET();
884 rn->rn_linklen = linklen;
885 strcpy(rn->rn_linktarg, target);
886
887 out:
888 vput(dvp);
889 return rv;
890 }
891
892 static int
893 rump_vop_readlink(void *v)
894 {
895 struct vop_readlink_args /* {
896 struct vnode *a_vp;
897 struct uio *a_uio;
898 kauth_cred_t a_cred;
899 }; */ *ap = v;
900 struct vnode *vp = ap->a_vp;
901 struct rumpfs_node *rn = vp->v_data;
902 struct uio *uio = ap->a_uio;
903
904 return uiomove(rn->rn_linktarg, rn->rn_linklen, uio);
905 }
906
907 static int
908 rump_vop_whiteout(void *v)
909 {
910 struct vop_whiteout_args /* {
911 struct vnode *a_dvp;
912 struct componentname *a_cnp;
913 int a_flags;
914 } */ *ap = v;
915 struct vnode *dvp = ap->a_dvp;
916 struct rumpfs_node *rnd = dvp->v_data;
917 struct componentname *cnp = ap->a_cnp;
918 int flags = ap->a_flags;
919
920 switch (flags) {
921 case LOOKUP:
922 break;
923 case CREATE:
924 makedir(rnd, cnp, RUMPFS_WHITEOUT);
925 break;
926 case DELETE:
927 cnp->cn_flags &= ~DOWHITEOUT; /* cargo culting never fails ? */
928 freedir(rnd, cnp);
929 break;
930 default:
931 panic("unknown whiteout op %d", flags);
932 }
933
934 return 0;
935 }
936
937 static int
938 rump_vop_open(void *v)
939 {
940 struct vop_open_args /* {
941 struct vnode *a_vp;
942 int a_mode;
943 kauth_cred_t a_cred;
944 } */ *ap = v;
945 struct vnode *vp = ap->a_vp;
946 struct rumpfs_node *rn = vp->v_data;
947 int mode = ap->a_mode;
948 int error = EINVAL;
949
950 if (vp->v_type != VREG || rn->rn_hostpath == NULL)
951 return 0;
952
953 if (mode & FREAD) {
954 if (rn->rn_readfd != -1)
955 return 0;
956 rn->rn_readfd = rumpuser_open(rn->rn_hostpath,
957 O_RDONLY, &error);
958 }
959
960 if (mode & FWRITE) {
961 if (rn->rn_writefd != -1)
962 return 0;
963 rn->rn_writefd = rumpuser_open(rn->rn_hostpath,
964 O_WRONLY, &error);
965 }
966
967 return error;
968 }
969
970 /* simple readdir. event omits dotstuff and periods */
971 static int
972 rump_vop_readdir(void *v)
973 {
974 struct vop_readdir_args /* {
975 struct vnode *a_vp;
976 struct uio *a_uio;
977 kauth_cred_t a_cred;
978 int *a_eofflag;
979 off_t **a_cookies;
980 int *a_ncookies;
981 } */ *ap = v;
982 struct vnode *vp = ap->a_vp;
983 struct uio *uio = ap->a_uio;
984 struct rumpfs_node *rnd = vp->v_data;
985 struct rumpfs_dent *rdent;
986 unsigned i;
987 int rv = 0;
988
989 /* seek to current entry */
990 for (i = 0, rdent = LIST_FIRST(&rnd->rn_dir);
991 (i < uio->uio_offset) && rdent;
992 i++, rdent = LIST_NEXT(rdent, rd_entries))
993 continue;
994 if (!rdent)
995 goto out;
996
997 /* copy entries */
998 for (; rdent && uio->uio_resid > 0;
999 rdent = LIST_NEXT(rdent, rd_entries), i++) {
1000 struct dirent dent;
1001
1002 strlcpy(dent.d_name, rdent->rd_name, sizeof(dent.d_name));
1003 dent.d_namlen = strlen(dent.d_name);
1004 dent.d_reclen = _DIRENT_RECLEN(&dent, dent.d_namlen);
1005
1006 if (__predict_false(RDENT_ISWHITEOUT(rdent))) {
1007 dent.d_fileno = INO_WHITEOUT;
1008 dent.d_type = DT_WHT;
1009 } else {
1010 dent.d_fileno = rdent->rd_node->rn_va.va_fileid;
1011 dent.d_type = vtype2dt(rdent->rd_node->rn_va.va_type);
1012 }
1013
1014 if (uio->uio_resid < dent.d_reclen) {
1015 i--;
1016 break;
1017 }
1018
1019 rv = uiomove(&dent, dent.d_reclen, uio);
1020 if (rv) {
1021 i--;
1022 break;
1023 }
1024 }
1025
1026 out:
1027 if (ap->a_cookies) {
1028 *ap->a_ncookies = 0;
1029 *ap->a_cookies = NULL;
1030 }
1031 if (rdent)
1032 *ap->a_eofflag = 0;
1033 else
1034 *ap->a_eofflag = 1;
1035 uio->uio_offset = i;
1036
1037 return rv;
1038 }
1039
1040 static int
1041 rump_vop_read(void *v)
1042 {
1043 struct vop_read_args /* {
1044 struct vnode *a_vp;
1045 struct uio *a_uio;
1046 int ioflags a_ioflag;
1047 kauth_cred_t a_cred;
1048 }; */ *ap = v;
1049 struct vnode *vp = ap->a_vp;
1050 struct rumpfs_node *rn = vp->v_data;
1051 struct uio *uio = ap->a_uio;
1052 uint8_t *buf;
1053 size_t bufsize;
1054 ssize_t n;
1055 int error = 0;
1056
1057 if (rn->rn_readfd == -1)
1058 return EOPNOTSUPP;
1059
1060 bufsize = uio->uio_resid;
1061 buf = kmem_alloc(bufsize, KM_SLEEP);
1062 if ((n = rumpuser_pread(rn->rn_readfd, buf, bufsize,
1063 uio->uio_offset + rn->rn_offset, &error)) == -1)
1064 goto out;
1065 KASSERT(n <= bufsize);
1066 error = uiomove(buf, n, uio);
1067
1068 out:
1069 kmem_free(buf, bufsize);
1070 return error;
1071 }
1072
1073 static int
1074 rump_vop_write(void *v)
1075 {
1076 struct vop_read_args /* {
1077 struct vnode *a_vp;
1078 struct uio *a_uio;
1079 int ioflags a_ioflag;
1080 kauth_cred_t a_cred;
1081 }; */ *ap = v;
1082 struct vnode *vp = ap->a_vp;
1083 struct rumpfs_node *rn = vp->v_data;
1084 struct uio *uio = ap->a_uio;
1085 uint8_t *buf;
1086 size_t bufsize;
1087 ssize_t n;
1088 int error = 0;
1089
1090 if (rn->rn_writefd == -1)
1091 return EOPNOTSUPP;
1092
1093 bufsize = uio->uio_resid;
1094 buf = kmem_alloc(bufsize, KM_SLEEP);
1095 error = uiomove(buf, bufsize, uio);
1096 if (error)
1097 goto out;
1098 KASSERT(uio->uio_resid == 0);
1099 n = rumpuser_pwrite(rn->rn_writefd, buf, bufsize,
1100 (uio->uio_offset-bufsize) + rn->rn_offset, &error);
1101 if (n >= 0) {
1102 KASSERT(n <= bufsize);
1103 uio->uio_resid = bufsize - n;
1104 }
1105
1106 out:
1107 kmem_free(buf, bufsize);
1108 return error;
1109 }
1110
1111 static int
1112 rump_vop_success(void *v)
1113 {
1114
1115 return 0;
1116 }
1117
1118 static int
1119 rump_vop_inactive(void *v)
1120 {
1121 struct vop_inactive_args /* {
1122 struct vnode *a_vp;
1123 bool *a_recycle;
1124 } */ *ap = v;
1125 struct vnode *vp = ap->a_vp;
1126 struct rumpfs_node *rn = vp->v_data;
1127 int error;
1128
1129 if (vp->v_type == VREG) {
1130 if (rn->rn_readfd != -1) {
1131 rumpuser_close(rn->rn_readfd, &error);
1132 rn->rn_readfd = -1;
1133 }
1134 if (rn->rn_writefd != -1) {
1135 rumpuser_close(rn->rn_writefd, &error);
1136 rn->rn_writefd = -1;
1137 }
1138 }
1139 *ap->a_recycle = (rn->rn_flags & RUMPNODE_CANRECLAIM) ? true : false;
1140
1141 VOP_UNLOCK(vp);
1142 return 0;
1143 }
1144
1145 static int
1146 rump_vop_reclaim(void *v)
1147 {
1148 struct vop_reclaim_args /* {
1149 struct vnode *a_vp;
1150 } */ *ap = v;
1151 struct vnode *vp = ap->a_vp;
1152 struct rumpfs_node *rn = vp->v_data;
1153
1154 mutex_enter(&reclock);
1155 rn->rn_vp = NULL;
1156 mutex_exit(&reclock);
1157 genfs_node_destroy(vp);
1158 vp->v_data = NULL;
1159
1160 if (rn->rn_flags & RUMPNODE_CANRECLAIM) {
1161 if (vp->v_type == VLNK)
1162 PNBUF_PUT(rn->rn_linktarg);
1163 if (rn->rn_hostpath)
1164 free(rn->rn_hostpath, M_TEMP);
1165 kmem_free(rn, sizeof(*rn));
1166 }
1167
1168 return 0;
1169 }
1170
1171 static int
1172 rump_vop_spec(void *v)
1173 {
1174 struct vop_generic_args *ap = v;
1175 int (**opvec)(void *);
1176
1177 switch (ap->a_desc->vdesc_offset) {
1178 case VOP_ACCESS_DESCOFFSET:
1179 case VOP_GETATTR_DESCOFFSET:
1180 case VOP_LOCK_DESCOFFSET:
1181 case VOP_UNLOCK_DESCOFFSET:
1182 case VOP_RECLAIM_DESCOFFSET:
1183 opvec = rump_vnodeop_p;
1184 break;
1185 default:
1186 opvec = spec_vnodeop_p;
1187 break;
1188 }
1189
1190 return VOCALL(opvec, ap->a_desc->vdesc_offset, v);
1191 }
1192
1193 /*
1194 * Begin vfs-level stuff
1195 */
1196
1197 VFS_PROTOS(rumpfs);
1198 struct vfsops rumpfs_vfsops = {
1199 .vfs_name = MOUNT_RUMPFS,
1200 .vfs_min_mount_data = 0,
1201 .vfs_mount = rumpfs_mount,
1202 .vfs_start = (void *)nullop,
1203 .vfs_unmount = rumpfs_unmount,
1204 .vfs_root = rumpfs_root,
1205 .vfs_quotactl = (void *)eopnotsupp,
1206 .vfs_statvfs = genfs_statvfs,
1207 .vfs_sync = (void *)nullop,
1208 .vfs_vget = rumpfs_vget,
1209 .vfs_fhtovp = (void *)eopnotsupp,
1210 .vfs_vptofh = (void *)eopnotsupp,
1211 .vfs_init = rumpfs_init,
1212 .vfs_reinit = NULL,
1213 .vfs_done = rumpfs_done,
1214 .vfs_mountroot = rumpfs_mountroot,
1215 .vfs_snapshot = (void *)eopnotsupp,
1216 .vfs_extattrctl = (void *)eopnotsupp,
1217 .vfs_suspendctl = (void *)eopnotsupp,
1218 .vfs_renamelock_enter = genfs_renamelock_enter,
1219 .vfs_renamelock_exit = genfs_renamelock_exit,
1220 .vfs_opv_descs = rump_opv_descs,
1221 /* vfs_refcount */
1222 /* vfs_list */
1223 };
1224
1225 int
1226 rumpfs_mount(struct mount *mp, const char *mntpath, void *arg, size_t *alen)
1227 {
1228
1229 return EOPNOTSUPP;
1230 }
1231
1232 int
1233 rumpfs_unmount(struct mount *mp, int flags)
1234 {
1235
1236 /* if going for it, just lie about it */
1237 if (panicstr)
1238 return 0;
1239
1240 return EOPNOTSUPP; /* ;) */
1241 }
1242
1243 int
1244 rumpfs_root(struct mount *mp, struct vnode **vpp)
1245 {
1246 struct rumpfs_mount *rfsmp = mp->mnt_data;
1247
1248 vref(rfsmp->rfsmp_rvp);
1249 vn_lock(rfsmp->rfsmp_rvp, LK_EXCLUSIVE | LK_RETRY);
1250 *vpp = rfsmp->rfsmp_rvp;
1251 return 0;
1252 }
1253
1254 int
1255 rumpfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1256 {
1257
1258 return EOPNOTSUPP;
1259 }
1260
1261 void
1262 rumpfs_init()
1263 {
1264
1265 CTASSERT(RUMP_ETFS_SIZE_ENDOFF == RUMPBLK_SIZENOTSET);
1266
1267 mutex_init(&reclock, MUTEX_DEFAULT, IPL_NONE);
1268 mutex_init(&etfs_lock, MUTEX_DEFAULT, IPL_NONE);
1269 }
1270
1271 void
1272 rumpfs_done()
1273 {
1274
1275 mutex_destroy(&reclock);
1276 mutex_destroy(&etfs_lock);
1277 }
1278
1279 int
1280 rumpfs_mountroot()
1281 {
1282 struct mount *mp;
1283 struct rumpfs_mount *rfsmp;
1284 struct rumpfs_node *rn;
1285 int error;
1286
1287 if ((error = vfs_rootmountalloc(MOUNT_RUMPFS, "rootdev", &mp)) != 0) {
1288 vrele(rootvp);
1289 return error;
1290 }
1291
1292 rfsmp = kmem_alloc(sizeof(*rfsmp), KM_SLEEP);
1293
1294 rn = makeprivate(VDIR, NODEV, DEV_BSIZE);
1295 rn->rn_parent = rn;
1296 error = makevnode(mp, rn, &rfsmp->rfsmp_rvp);
1297 if (error)
1298 panic("could not create root vnode: %d", error);
1299 rfsmp->rfsmp_rvp->v_vflag |= VV_ROOT;
1300 VOP_UNLOCK(rfsmp->rfsmp_rvp);
1301
1302 mutex_enter(&mountlist_lock);
1303 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1304 mutex_exit(&mountlist_lock);
1305
1306 mp->mnt_data = rfsmp;
1307 mp->mnt_stat.f_namemax = MAXNAMLEN;
1308 mp->mnt_stat.f_iosize = 512;
1309 mp->mnt_flag |= MNT_LOCAL;
1310 mp->mnt_iflag |= IMNT_MPSAFE;
1311 vfs_getnewfsid(mp);
1312
1313 error = set_statvfs_info("/", UIO_SYSSPACE, "rumpfs", UIO_SYSSPACE,
1314 mp->mnt_op->vfs_name, mp, curlwp);
1315 if (error)
1316 panic("set statvfsinfo for rootfs failed");
1317
1318 vfs_unbusy(mp, false, NULL);
1319
1320 return 0;
1321 }
1322