rumpfs.c revision 1.163 1 /* $NetBSD: rumpfs.c,v 1.163 2021/06/29 22:34:09 dholland Exp $ */
2
3 /*
4 * Copyright (c) 2009, 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: rumpfs.c,v 1.163 2021/06/29 22:34:09 dholland Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/buf.h>
34 #include <sys/dirent.h>
35 #include <sys/errno.h>
36 #include <sys/filedesc.h>
37 #include <sys/fcntl.h>
38 #include <sys/kauth.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/lock.h>
44 #include <sys/lockf.h>
45 #include <sys/queue.h>
46 #include <sys/stat.h>
47 #include <sys/syscallargs.h>
48 #include <sys/vnode.h>
49 #include <sys/fstrans.h>
50 #include <sys/unistd.h>
51
52 #include <miscfs/specfs/specdev.h>
53 #include <miscfs/genfs/genfs.h>
54 #include <miscfs/genfs/genfs_node.h>
55
56 #include <uvm/uvm_extern.h>
57
58 #include <rump-sys/kern.h>
59 #include <rump-sys/vfs.h>
60
61 #include <rump/rumpfs.h>
62 #include <rump/rumpuser.h>
63
64 static int rump_vop_lookup(void *);
65 static int rump_vop_getattr(void *);
66 static int rump_vop_setattr(void *);
67 static int rump_vop_mkdir(void *);
68 static int rump_vop_rmdir(void *);
69 static int rump_vop_remove(void *);
70 static int rump_vop_mknod(void *);
71 static int rump_vop_create(void *);
72 static int rump_vop_inactive(void *);
73 static int rump_vop_reclaim(void *);
74 static int rump_vop_success(void *);
75 static int rump_vop_readdir(void *);
76 static int rump_vop_spec(void *);
77 static int rump_vop_read(void *);
78 static int rump_vop_write(void *);
79 static int rump_vop_open(void *);
80 static int rump_vop_symlink(void *);
81 static int rump_vop_readlink(void *);
82 static int rump_vop_whiteout(void *);
83 static int rump_vop_pathconf(void *);
84 static int rump_vop_bmap(void *);
85 static int rump_vop_strategy(void *);
86 static int rump_vop_advlock(void *);
87 static int rump_vop_access(void *);
88 static int rump_vop_fcntl(void *);
89
90 int (**rump_vnodeop_p)(void *);
91 const struct vnodeopv_entry_desc rump_vnodeop_entries[] = {
92 { &vop_default_desc, vn_default_error },
93 { &vop_parsepath_desc, genfs_parsepath },
94 { &vop_lookup_desc, rump_vop_lookup },
95 { &vop_getattr_desc, rump_vop_getattr },
96 { &vop_setattr_desc, rump_vop_setattr },
97 { &vop_mkdir_desc, rump_vop_mkdir },
98 { &vop_rmdir_desc, rump_vop_rmdir },
99 { &vop_remove_desc, rump_vop_remove },
100 { &vop_mknod_desc, rump_vop_mknod },
101 { &vop_create_desc, rump_vop_create },
102 { &vop_symlink_desc, rump_vop_symlink },
103 { &vop_readlink_desc, rump_vop_readlink },
104 { &vop_access_desc, rump_vop_access },
105 { &vop_accessx_desc, genfs_accessx },
106 { &vop_readdir_desc, rump_vop_readdir },
107 { &vop_read_desc, rump_vop_read },
108 { &vop_write_desc, rump_vop_write },
109 { &vop_open_desc, rump_vop_open },
110 { &vop_close_desc, genfs_nullop },
111 { &vop_seek_desc, genfs_seek },
112 { &vop_getpages_desc, genfs_getpages },
113 { &vop_putpages_desc, genfs_putpages },
114 { &vop_whiteout_desc, rump_vop_whiteout },
115 { &vop_fsync_desc, rump_vop_success },
116 { &vop_lock_desc, genfs_lock },
117 { &vop_unlock_desc, genfs_unlock },
118 { &vop_islocked_desc, genfs_islocked },
119 { &vop_inactive_desc, rump_vop_inactive },
120 { &vop_reclaim_desc, rump_vop_reclaim },
121 { &vop_link_desc, genfs_eopnotsupp },
122 { &vop_pathconf_desc, rump_vop_pathconf },
123 { &vop_bmap_desc, rump_vop_bmap },
124 { &vop_strategy_desc, rump_vop_strategy },
125 { &vop_advlock_desc, rump_vop_advlock },
126 { &vop_fcntl_desc, rump_vop_fcntl },
127 { NULL, NULL }
128 };
129 const struct vnodeopv_desc rump_vnodeop_opv_desc =
130 { &rump_vnodeop_p, rump_vnodeop_entries };
131
132 int (**rump_specop_p)(void *);
133 const struct vnodeopv_entry_desc rump_specop_entries[] = {
134 { &vop_default_desc, rump_vop_spec },
135 { NULL, NULL }
136 };
137 const struct vnodeopv_desc rump_specop_opv_desc =
138 { &rump_specop_p, rump_specop_entries };
139
140 const struct vnodeopv_desc * const rump_opv_descs[] = {
141 &rump_vnodeop_opv_desc,
142 &rump_specop_opv_desc,
143 NULL
144 };
145
146 #define RUMPFS_WHITEOUT ((void *)-1)
147 #define RDENT_ISWHITEOUT(rdp) (rdp->rd_node == RUMPFS_WHITEOUT)
148 struct rumpfs_dent {
149 char *rd_name;
150 int rd_namelen;
151 struct rumpfs_node *rd_node;
152
153 LIST_ENTRY(rumpfs_dent) rd_entries;
154 };
155
156 struct genfs_ops rumpfs_genfsops = {
157 .gop_size = genfs_size,
158 .gop_write = genfs_gop_write,
159 .gop_putrange = genfs_gop_putrange,
160
161 /* optional */
162 .gop_alloc = NULL,
163 .gop_markupdate = NULL,
164 };
165
166 struct rumpfs_node {
167 struct genfs_node rn_gn;
168 struct vattr rn_va;
169 struct vnode *rn_vp;
170 char *rn_hostpath;
171 int rn_flags;
172 struct lockf *rn_lockf;
173
174 union {
175 struct { /* VREG */
176 int readfd;
177 int writefd;
178 uint64_t offset;
179 } reg;
180 struct {
181 void *data;
182 size_t dlen;
183 } reg_noet;
184 struct { /* VDIR */
185 LIST_HEAD(, rumpfs_dent) dents;
186 struct rumpfs_node *parent;
187 int flags;
188 } dir;
189 struct {
190 char *target;
191 size_t len;
192 } link;
193 } rn_u;
194 };
195 #define rn_readfd rn_u.reg.readfd
196 #define rn_writefd rn_u.reg.writefd
197 #define rn_offset rn_u.reg.offset
198 #define rn_data rn_u.reg_noet.data
199 #define rn_dlen rn_u.reg_noet.dlen
200 #define rn_dir rn_u.dir.dents
201 #define rn_parent rn_u.dir.parent
202 #define rn_linktarg rn_u.link.target
203 #define rn_linklen rn_u.link.len
204
205 #define RUMPNODE_CANRECLAIM 0x01
206 #define RUMPNODE_DIR_ET 0x02
207 #define RUMPNODE_DIR_ETSUBS 0x04
208 #define RUMPNODE_ET_PHONE_HOST 0x10
209 #define RUMPNODE_EXTSTORAGE 0x20
210
211 struct rumpfs_mount {
212 struct vnode *rfsmp_rvp;
213 };
214
215 #define INO_WHITEOUT 1
216 static int lastino = 2;
217 static kmutex_t reclock;
218
219 #define RUMPFS_DEFAULTMODE 0755
220 static void freedir(struct rumpfs_node *, struct componentname *);
221 static struct rumpfs_node *makeprivate(enum vtype, mode_t, dev_t, off_t, bool);
222 static void freeprivate(struct rumpfs_node *);
223
224 /*
225 * Extra Terrestrial stuff. We map a given key (pathname) to a file on
226 * the host FS. ET phones home only from the root node of rumpfs.
227 *
228 * When an etfs node is removed, a vnode potentially behind it is not
229 * immediately recycled.
230 */
231
232 struct etfs {
233 char et_key[MAXPATHLEN];
234 size_t et_keylen;
235 bool et_prefixkey;
236 bool et_removing;
237 devminor_t et_blkmin;
238
239 LIST_ENTRY(etfs) et_entries;
240
241 struct rumpfs_node *et_rn;
242 };
243 static kmutex_t etfs_lock;
244 static LIST_HEAD(, etfs) etfs_list = LIST_HEAD_INITIALIZER(etfs_list);
245
246 static enum vtype
247 ettype_to_vtype(enum rump_etfs_type et)
248 {
249 enum vtype vt;
250
251 switch (et) {
252 case RUMP_ETFS_REG:
253 vt = VREG;
254 break;
255 case RUMP_ETFS_BLK:
256 vt = VBLK;
257 break;
258 case RUMP_ETFS_CHR:
259 vt = VCHR;
260 break;
261 case RUMP_ETFS_DIR:
262 vt = VDIR;
263 break;
264 case RUMP_ETFS_DIR_SUBDIRS:
265 vt = VDIR;
266 break;
267 default:
268 panic("invalid et type: %d", et);
269 }
270
271 return vt;
272 }
273
274 static enum vtype
275 hft_to_vtype(int hft)
276 {
277 enum vtype vt;
278
279 switch (hft) {
280 case RUMPUSER_FT_OTHER:
281 vt = VNON;
282 break;
283 case RUMPUSER_FT_DIR:
284 vt = VDIR;
285 break;
286 case RUMPUSER_FT_REG:
287 vt = VREG;
288 break;
289 case RUMPUSER_FT_BLK:
290 vt = VBLK;
291 break;
292 case RUMPUSER_FT_CHR:
293 vt = VCHR;
294 break;
295 default:
296 vt = VNON;
297 break;
298 }
299
300 return vt;
301 }
302
303 static bool
304 etfs_find(const char *key, struct etfs **etp, bool forceprefix)
305 {
306 struct etfs *et;
307 size_t keylen = strlen(key);
308
309 KASSERT(mutex_owned(&etfs_lock));
310
311 LIST_FOREACH(et, &etfs_list, et_entries) {
312 if ((keylen == et->et_keylen || et->et_prefixkey || forceprefix)
313 && strncmp(key, et->et_key, et->et_keylen) == 0) {
314 if (etp)
315 *etp = et;
316 return true;
317 }
318 }
319
320 return false;
321 }
322
323 #define REGDIR(ftype) \
324 ((ftype) == RUMP_ETFS_DIR || (ftype) == RUMP_ETFS_DIR_SUBDIRS)
325 static int
326 etfsregister(const char *key, const char *hostpath,
327 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
328 {
329 char buf[9];
330 struct etfs *et;
331 struct rumpfs_node *rn;
332 uint64_t fsize;
333 dev_t rdev = NODEV;
334 devminor_t dmin = -1;
335 int hft, error;
336
337 if (key[0] != '/') {
338 return EINVAL;
339 }
340 while (key[0] == '/') {
341 key++;
342 }
343
344 if ((error = rumpuser_getfileinfo(hostpath, &fsize, &hft)) != 0)
345 return error;
346
347 /* etfs directory requires a directory on the host */
348 if (REGDIR(ftype)) {
349 if (hft != RUMPUSER_FT_DIR)
350 return ENOTDIR;
351 if (begin != 0)
352 return EISDIR;
353 if (size != RUMP_ETFS_SIZE_ENDOFF)
354 return EISDIR;
355 size = fsize;
356 } else {
357 if (begin > fsize)
358 return EINVAL;
359 if (size == RUMP_ETFS_SIZE_ENDOFF)
360 size = fsize - begin;
361 if (begin + size > fsize)
362 return EINVAL;
363 }
364
365 if (ftype == RUMP_ETFS_BLK || ftype == RUMP_ETFS_CHR) {
366 error = rumpblk_register(hostpath, &dmin, begin, size);
367 if (error != 0) {
368 return error;
369 }
370 rdev = makedev(RUMPBLK_DEVMAJOR, dmin);
371 }
372
373 et = kmem_alloc(sizeof(*et), KM_SLEEP);
374 strcpy(et->et_key, key);
375 et->et_keylen = strlen(et->et_key);
376 et->et_rn = rn = makeprivate(ettype_to_vtype(ftype), RUMPFS_DEFAULTMODE,
377 rdev, size, true);
378 et->et_removing = false;
379 et->et_blkmin = dmin;
380
381 rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
382
383 if (ftype == RUMP_ETFS_REG || REGDIR(ftype) || et->et_blkmin != -1) {
384 size_t len = strlen(hostpath)+1;
385
386 rn->rn_hostpath = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
387 memcpy(rn->rn_hostpath, hostpath, len);
388 rn->rn_offset = begin;
389 }
390
391 if (REGDIR(ftype)) {
392 rn->rn_flags |= RUMPNODE_DIR_ET;
393 et->et_prefixkey = true;
394 } else {
395 et->et_prefixkey = false;
396 }
397
398 if (ftype == RUMP_ETFS_DIR_SUBDIRS)
399 rn->rn_flags |= RUMPNODE_DIR_ETSUBS;
400
401 mutex_enter(&etfs_lock);
402 if (etfs_find(key, NULL, REGDIR(ftype))) {
403 mutex_exit(&etfs_lock);
404 if (et->et_blkmin != -1)
405 rumpblk_deregister(hostpath);
406 if (et->et_rn->rn_hostpath != NULL)
407 free(et->et_rn->rn_hostpath, M_TEMP);
408 freeprivate(et->et_rn);
409 kmem_free(et, sizeof(*et));
410 return EEXIST;
411 }
412 LIST_INSERT_HEAD(&etfs_list, et, et_entries);
413 mutex_exit(&etfs_lock);
414
415 if (ftype == RUMP_ETFS_BLK) {
416 format_bytes(buf, sizeof(buf), size);
417 aprint_verbose("/%s: hostpath %s (%s)\n", key, hostpath, buf);
418 }
419
420 return 0;
421 }
422 #undef REGDIR
423
424 /* remove etfs mapping. caller's responsibility to make sure it's not in use */
425 static int
426 etfsremove(const char *key)
427 {
428 struct etfs *et;
429 size_t keylen;
430 int rv __diagused;
431
432 if (key[0] != '/') {
433 return EINVAL;
434 }
435 while (key[0] == '/') {
436 key++;
437 }
438
439 keylen = strlen(key);
440
441 mutex_enter(&etfs_lock);
442 LIST_FOREACH(et, &etfs_list, et_entries) {
443 if (keylen == et->et_keylen && strcmp(et->et_key, key) == 0) {
444 if (et->et_removing)
445 et = NULL;
446 else
447 et->et_removing = true;
448 break;
449 }
450 }
451 mutex_exit(&etfs_lock);
452 if (!et)
453 return ENOENT;
454
455 /*
456 * ok, we know what we want to remove and have signalled there
457 * actually are men at work. first, unregister from rumpblk
458 */
459 if (et->et_blkmin != -1) {
460 rv = rumpblk_deregister(et->et_rn->rn_hostpath);
461 } else {
462 rv = 0;
463 }
464 KASSERT(rv == 0);
465
466 /* then do the actual removal */
467 mutex_enter(&etfs_lock);
468 LIST_REMOVE(et, et_entries);
469 mutex_exit(&etfs_lock);
470
471 /* node is unreachable, safe to nuke all device copies */
472 if (et->et_blkmin != -1) {
473 vdevgone(RUMPBLK_DEVMAJOR, et->et_blkmin, et->et_blkmin, VBLK);
474 } else {
475 struct vnode *vp;
476 struct mount *mp;
477 struct rumpfs_node *rn;
478
479 mutex_enter(&reclock);
480 if ((vp = et->et_rn->rn_vp) != NULL) {
481 mp = vp->v_mount;
482 rn = vp->v_data;
483 KASSERT(rn == et->et_rn);
484 } else {
485 mp = NULL;
486 }
487 mutex_exit(&reclock);
488 if (mp && vcache_get(mp, &rn, sizeof(rn), &vp) == 0) {
489 rv = vfs_suspend(mp, 0);
490 KASSERT(rv == 0);
491 vgone(vp);
492 vfs_resume(mp);
493 }
494 }
495
496 if (et->et_rn->rn_hostpath != NULL)
497 free(et->et_rn->rn_hostpath, M_TEMP);
498 freeprivate(et->et_rn);
499 kmem_free(et, sizeof(*et));
500
501 return 0;
502 }
503
504 /*
505 * rumpfs
506 */
507
508 static struct rumpfs_node *
509 makeprivate(enum vtype vt, mode_t mode, dev_t rdev, off_t size, bool et)
510 {
511 struct rumpfs_node *rn;
512 struct vattr *va;
513 struct timespec ts;
514
515 KASSERT((mode & ~ALLPERMS) == 0);
516 rn = kmem_zalloc(sizeof(*rn), KM_SLEEP);
517
518 switch (vt) {
519 case VDIR:
520 LIST_INIT(&rn->rn_dir);
521 break;
522 case VREG:
523 if (et) {
524 rn->rn_readfd = -1;
525 rn->rn_writefd = -1;
526 }
527 break;
528 default:
529 break;
530 }
531
532 nanotime(&ts);
533
534 va = &rn->rn_va;
535 va->va_type = vt;
536 va->va_mode = mode;
537 if (vt == VDIR)
538 va->va_nlink = 2;
539 else
540 va->va_nlink = 1;
541 va->va_uid = 0;
542 va->va_gid = 0;
543 va->va_fsid =
544 va->va_fileid = atomic_inc_uint_nv(&lastino);
545 va->va_size = size;
546 va->va_blocksize = 512;
547 va->va_atime = ts;
548 va->va_mtime = ts;
549 va->va_ctime = ts;
550 va->va_birthtime = ts;
551 va->va_gen = 0;
552 va->va_flags = 0;
553 va->va_rdev = rdev;
554 va->va_bytes = 512;
555 va->va_filerev = 0;
556 va->va_vaflags = 0;
557
558 return rn;
559 }
560
561 static void
562 freeprivate(struct rumpfs_node *rn)
563 {
564
565 kmem_free(rn, sizeof(*rn));
566 }
567
568 static void
569 makedir(struct rumpfs_node *rnd,
570 struct componentname *cnp, struct rumpfs_node *rn)
571 {
572 struct rumpfs_dent *rdent;
573
574 rdent = kmem_alloc(sizeof(*rdent), KM_SLEEP);
575 rdent->rd_name = kmem_alloc(cnp->cn_namelen+1, KM_SLEEP);
576 rdent->rd_node = rn;
577 strlcpy(rdent->rd_name, cnp->cn_nameptr, cnp->cn_namelen+1);
578 rdent->rd_namelen = strlen(rdent->rd_name);
579
580 if ((cnp->cn_flags & ISWHITEOUT) != 0) {
581 KASSERT((cnp->cn_flags & DOWHITEOUT) == 0);
582 freedir(rnd, cnp);
583 }
584 LIST_INSERT_HEAD(&rnd->rn_dir, rdent, rd_entries);
585 }
586
587 static void
588 freedir(struct rumpfs_node *rnd, struct componentname *cnp)
589 {
590 struct rumpfs_dent *rd = NULL;
591
592 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
593 if (rd->rd_namelen == cnp->cn_namelen &&
594 strncmp(rd->rd_name, cnp->cn_nameptr,
595 cnp->cn_namelen) == 0)
596 break;
597 }
598 if (rd == NULL)
599 panic("could not find directory entry: %s", cnp->cn_nameptr);
600
601 if (cnp->cn_flags & DOWHITEOUT) {
602 rd->rd_node = RUMPFS_WHITEOUT;
603 } else {
604 LIST_REMOVE(rd, rd_entries);
605 kmem_free(rd->rd_name, rd->rd_namelen+1);
606 kmem_free(rd, sizeof(*rd));
607 }
608 }
609
610 #define RUMPFS_ACCESS 1
611 #define RUMPFS_MODIFY 2
612 #define RUMPFS_CHANGE 4
613
614 static int
615 rumpfs_update(int flags, struct vnode *vp, const struct timespec *acc,
616 const struct timespec *mod, const struct timespec *chg)
617 {
618 struct rumpfs_node *rn = vp->v_data;
619
620 if (flags == 0)
621 return 0;
622
623 if (vp->v_mount->mnt_flag & MNT_RDONLY)
624 return EROFS;
625
626 if (flags & RUMPFS_ACCESS)
627 rn->rn_va.va_atime = *acc;
628 if (flags & RUMPFS_MODIFY)
629 rn->rn_va.va_mtime = *mod;
630 if (flags & RUMPFS_CHANGE)
631 rn->rn_va.va_ctime = *chg;
632
633 return 0;
634 }
635
636 /*
637 * Simple lookup for rump file systems.
638 *
639 * uhm, this is twisted. C F C C, hope of C C F C looming
640 */
641 static int
642 rump_vop_lookup(void *v)
643 {
644 struct vop_lookup_v2_args /* {
645 struct vnode *a_dvp;
646 struct vnode **a_vpp;
647 struct componentname *a_cnp;
648 }; */ *ap = v;
649 struct componentname *cnp = ap->a_cnp;
650 struct vnode *dvp = ap->a_dvp;
651 struct vnode **vpp = ap->a_vpp;
652 struct rumpfs_node *rnd = dvp->v_data, *rn;
653 struct rumpfs_dent *rd = NULL;
654 struct etfs *et;
655 bool dotdot = (cnp->cn_flags & ISDOTDOT) != 0;
656 int rv = 0;
657 const char *cp;
658
659 *vpp = NULL;
660
661 rv = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
662 if (rv)
663 return rv;
664
665 if ((cnp->cn_flags & ISLASTCN)
666 && (dvp->v_mount->mnt_flag & MNT_RDONLY)
667 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
668 return EROFS;
669
670 /* check for dot, return directly if the case */
671 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
672 vref(dvp);
673 *vpp = dvp;
674 return 0;
675 }
676
677 /* we don't do rename */
678 if (!(((cnp->cn_flags & ISLASTCN) == 0) || (cnp->cn_nameiop != RENAME)))
679 return EOPNOTSUPP;
680
681 /* check for etfs */
682 if (dvp == rootvnode &&
683 (cnp->cn_nameiop == LOOKUP || cnp->cn_nameiop == CREATE)) {
684 bool found;
685 mutex_enter(&etfs_lock);
686 found = etfs_find(cnp->cn_nameptr, &et, false);
687 mutex_exit(&etfs_lock);
688
689 if (found) {
690 rn = et->et_rn;
691 cnp->cn_consume += et->et_keylen - cnp->cn_namelen;
692 /*
693 * consume trailing slashes if any and clear
694 * REQUIREDIR if we consumed the full path.
695 */
696 cp = &cnp->cn_nameptr[cnp->cn_namelen];
697 cp += cnp->cn_consume;
698 KASSERT(*cp == '\0' || *cp == '/');
699 if (*cp == '\0' && rn->rn_va.va_type != VDIR)
700 cnp->cn_flags &= ~REQUIREDIR;
701 while (*cp++ == '/')
702 cnp->cn_consume++;
703 goto getvnode;
704 }
705 }
706
707 if (rnd->rn_flags & RUMPNODE_DIR_ET) {
708 uint64_t fsize;
709 char *newpath;
710 size_t newpathlen;
711 int hft, error;
712
713 if (dotdot)
714 return EOPNOTSUPP;
715
716 newpathlen = strlen(rnd->rn_hostpath) + 1 + cnp->cn_namelen + 1;
717 newpath = malloc(newpathlen, M_TEMP, M_WAITOK);
718
719 strlcpy(newpath, rnd->rn_hostpath, newpathlen);
720 strlcat(newpath, "/", newpathlen);
721 strlcat(newpath, cnp->cn_nameptr, newpathlen);
722
723 if ((error = rumpuser_getfileinfo(newpath, &fsize, &hft)) != 0){
724 free(newpath, M_TEMP);
725 return error;
726 }
727
728 /* allow only dirs and regular files */
729 if (hft != RUMPUSER_FT_REG && hft != RUMPUSER_FT_DIR) {
730 free(newpath, M_TEMP);
731 return ENOENT;
732 }
733
734 rn = makeprivate(hft_to_vtype(hft), RUMPFS_DEFAULTMODE,
735 NODEV, fsize, true);
736 rn->rn_flags |= RUMPNODE_CANRECLAIM;
737 if (rnd->rn_flags & RUMPNODE_DIR_ETSUBS) {
738 rn->rn_flags |= RUMPNODE_DIR_ET | RUMPNODE_DIR_ETSUBS;
739 rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
740 }
741 rn->rn_hostpath = newpath;
742
743 goto getvnode;
744 } else {
745 if (dotdot) {
746 if ((rn = rnd->rn_parent) != NULL)
747 goto getvnode;
748 } else {
749 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
750 if (rd->rd_namelen == cnp->cn_namelen &&
751 strncmp(rd->rd_name, cnp->cn_nameptr,
752 cnp->cn_namelen) == 0)
753 break;
754 }
755 }
756 }
757
758 if (!rd && ((cnp->cn_flags & ISLASTCN) == 0||cnp->cn_nameiop != CREATE))
759 return ENOENT;
760
761 if (!rd && (cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
762 if (dvp->v_mount->mnt_flag & MNT_RDONLY)
763 return EROFS;
764 rv = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
765 if (rv)
766 return rv;
767 return EJUSTRETURN;
768 }
769
770 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == DELETE) {
771 rv = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
772 if (rv)
773 return rv;
774 }
775
776 if (RDENT_ISWHITEOUT(rd)) {
777 cnp->cn_flags |= ISWHITEOUT;
778 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE)
779 return EJUSTRETURN;
780 return ENOENT;
781 }
782
783 rn = rd->rd_node;
784
785 getvnode:
786 KASSERT(rn);
787 rv = vcache_get(dvp->v_mount, &rn, sizeof(rn), vpp);
788 if (rv) {
789 if (rnd->rn_flags & RUMPNODE_DIR_ET)
790 freeprivate(rn);
791 return rv;
792 }
793
794 return 0;
795 }
796
797 static int
798 rump_check_possible(struct vnode *vp, struct rumpfs_node *rnode,
799 mode_t mode)
800 {
801
802 if ((mode & VWRITE) == 0)
803 return 0;
804
805 switch (vp->v_type) {
806 case VDIR:
807 case VLNK:
808 case VREG:
809 break;
810 default:
811 /* special file is always writable. */
812 return 0;
813 }
814
815 return vp->v_mount->mnt_flag & MNT_RDONLY ? EROFS : 0;
816 }
817
818 static int
819 rump_check_permitted(struct vnode *vp, struct rumpfs_node *rnode,
820 accmode_t accmode, kauth_cred_t cred)
821 {
822 struct vattr *attr = &rnode->rn_va;
823
824 return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(accmode,
825 vp->v_type, attr->va_mode), vp, NULL, genfs_can_access(vp, cred,
826 attr->va_uid, attr->va_gid, attr->va_mode, NULL, accmode));
827 }
828
829 int
830 rump_vop_access(void *v)
831 {
832 struct vop_access_args /* {
833 const struct vnodeop_desc *a_desc;
834 struct vnode *a_vp;
835 int a_mode;
836 kauth_cred_t a_cred;
837 } */ *ap = v;
838 struct vnode *vp = ap->a_vp;
839 struct rumpfs_node *rn = vp->v_data;
840 int error;
841
842 error = rump_check_possible(vp, rn, ap->a_accmode);
843 if (error)
844 return error;
845
846 error = rump_check_permitted(vp, rn, ap->a_accmode, ap->a_cred);
847
848 return error;
849 }
850
851 static int
852 rump_vop_getattr(void *v)
853 {
854 struct vop_getattr_args /* {
855 struct vnode *a_vp;
856 struct vattr *a_vap;
857 kauth_cred_t a_cred;
858 } */ *ap = v;
859 struct vnode *vp = ap->a_vp;
860 struct rumpfs_node *rn = vp->v_data;
861 struct vattr *vap = ap->a_vap;
862
863 memcpy(vap, &rn->rn_va, sizeof(struct vattr));
864 vap->va_size = vp->v_size;
865 return 0;
866 }
867
868 static int
869 rump_vop_setattr(void *v)
870 {
871 struct vop_setattr_args /* {
872 struct vnode *a_vp;
873 struct vattr *a_vap;
874 kauth_cred_t a_cred;
875 } */ *ap = v;
876 struct vnode *vp = ap->a_vp;
877 struct vattr *vap = ap->a_vap;
878 struct rumpfs_node *rn = vp->v_data;
879 struct vattr *attr = &rn->rn_va;
880 struct timespec now;
881 kauth_cred_t cred = ap->a_cred;
882 int error;
883
884 #define CHANGED(a, t) (vap->a != (t)VNOVAL)
885 #define SETIFVAL(a,t) if (CHANGED(a, t)) rn->rn_va.a = vap->a
886 if (CHANGED(va_atime.tv_sec, time_t) ||
887 CHANGED(va_ctime.tv_sec, time_t) ||
888 CHANGED(va_mtime.tv_sec, time_t) ||
889 CHANGED(va_birthtime.tv_sec, time_t) ||
890 CHANGED(va_atime.tv_nsec, long) ||
891 CHANGED(va_ctime.tv_nsec, long) ||
892 CHANGED(va_mtime.tv_nsec, long) ||
893 CHANGED(va_birthtime.tv_nsec, long)) {
894 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp,
895 NULL, genfs_can_chtimes(vp, cred, attr->va_uid,
896 vap->va_vaflags));
897 if (error)
898 return error;
899 }
900
901 int flags = 0;
902 getnanotime(&now);
903 if (vap->va_atime.tv_sec != VNOVAL)
904 if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
905 flags |= RUMPFS_ACCESS;
906 if (vap->va_mtime.tv_sec != VNOVAL) {
907 flags |= RUMPFS_CHANGE | RUMPFS_MODIFY;
908 if (vp->v_mount->mnt_flag & MNT_RELATIME)
909 flags |= RUMPFS_ACCESS;
910 } else if (vap->va_size == 0) {
911 flags |= RUMPFS_MODIFY;
912 vap->va_mtime = now;
913 }
914 SETIFVAL(va_birthtime.tv_sec, time_t);
915 SETIFVAL(va_birthtime.tv_nsec, long);
916 flags |= RUMPFS_CHANGE;
917 error = rumpfs_update(flags, vp, &vap->va_atime, &vap->va_mtime, &now);
918 if (error)
919 return error;
920
921 if (CHANGED(va_flags, u_long)) {
922 /* XXX Can we handle system flags here...? */
923 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_FLAGS, vp,
924 NULL, genfs_can_chflags(vp, cred, attr->va_uid, false));
925 if (error)
926 return error;
927 }
928
929 SETIFVAL(va_flags, u_long);
930 #undef SETIFVAL
931 #undef CHANGED
932
933 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (uid_t)VNOVAL) {
934 uid_t uid =
935 (vap->va_uid != (uid_t)VNOVAL) ? vap->va_uid : attr->va_uid;
936 gid_t gid =
937 (vap->va_gid != (gid_t)VNOVAL) ? vap->va_gid : attr->va_gid;
938 error = kauth_authorize_vnode(cred,
939 KAUTH_VNODE_CHANGE_OWNERSHIP, vp, NULL,
940 genfs_can_chown(vp, cred, attr->va_uid, attr->va_gid, uid,
941 gid));
942 if (error)
943 return error;
944 attr->va_uid = uid;
945 attr->va_gid = gid;
946 }
947
948 if (vap->va_mode != (mode_t)VNOVAL) {
949 mode_t mode = vap->va_mode;
950 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY,
951 vp, NULL, genfs_can_chmod(vp, cred, attr->va_uid,
952 attr->va_gid, mode));
953 if (error)
954 return error;
955 attr->va_mode = mode;
956 }
957
958 if (vp->v_type == VREG &&
959 vap->va_size != VSIZENOTSET &&
960 vap->va_size != rn->rn_dlen &&
961 (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0) {
962 void *newdata;
963 size_t copylen, newlen;
964
965 newlen = vap->va_size;
966 newdata = rump_hypermalloc(newlen, 0, false, "rumpfs");
967 if (newdata == NULL)
968 return ENOSPC;
969
970 copylen = MIN(rn->rn_dlen, newlen);
971 if (copylen > 0)
972 memcpy(newdata, rn->rn_data, copylen);
973 memset((char *)newdata + copylen, 0, newlen - copylen);
974
975 if ((rn->rn_flags & RUMPNODE_EXTSTORAGE) == 0) {
976 rump_hyperfree(rn->rn_data, rn->rn_dlen);
977 } else {
978 rn->rn_flags &= ~RUMPNODE_EXTSTORAGE;
979 }
980
981 rn->rn_data = newdata;
982 rn->rn_dlen = newlen;
983 uvm_vnp_setsize(vp, newlen);
984 }
985 return 0;
986 }
987
988 static int
989 rump_vop_mkdir(void *v)
990 {
991 struct vop_mkdir_v3_args /* {
992 struct vnode *a_dvp;
993 struct vnode **a_vpp;
994 struct componentname *a_cnp;
995 struct vattr *a_vap;
996 }; */ *ap = v;
997 struct vnode *dvp = ap->a_dvp;
998 struct vnode **vpp = ap->a_vpp;
999 struct componentname *cnp = ap->a_cnp;
1000 struct vattr *va = ap->a_vap;
1001 struct rumpfs_node *rnd = dvp->v_data, *rn;
1002 int rv = 0;
1003
1004 rn = makeprivate(VDIR, va->va_mode & ALLPERMS, NODEV, DEV_BSIZE, false);
1005 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1006 rn->rn_va.va_flags |= UF_OPAQUE;
1007 rn->rn_parent = rnd;
1008 rv = vcache_get(dvp->v_mount, &rn, sizeof(rn), vpp);
1009 if (rv) {
1010 freeprivate(rn);
1011 return rv;
1012 }
1013
1014 makedir(rnd, cnp, rn);
1015
1016 return rv;
1017 }
1018
1019 static int
1020 rump_vop_rmdir(void *v)
1021 {
1022 struct vop_rmdir_v2_args /* {
1023 struct vnode *a_dvp;
1024 struct vnode *a_vp;
1025 struct componentname *a_cnp;
1026 }; */ *ap = v;
1027 struct vnode *dvp = ap->a_dvp;
1028 struct vnode *vp = ap->a_vp;
1029 struct componentname *cnp = ap->a_cnp;
1030 struct rumpfs_node *rnd = dvp->v_data;
1031 struct rumpfs_node *rn = vp->v_data;
1032 struct rumpfs_dent *rd;
1033 int rv = 0;
1034
1035 LIST_FOREACH(rd, &rn->rn_dir, rd_entries) {
1036 if (rd->rd_node != RUMPFS_WHITEOUT) {
1037 rv = ENOTEMPTY;
1038 goto out;
1039 }
1040 }
1041 while ((rd = LIST_FIRST(&rn->rn_dir)) != NULL) {
1042 KASSERT(rd->rd_node == RUMPFS_WHITEOUT);
1043 LIST_REMOVE(rd, rd_entries);
1044 kmem_free(rd->rd_name, rd->rd_namelen+1);
1045 kmem_free(rd, sizeof(*rd));
1046 }
1047
1048 freedir(rnd, cnp);
1049 rn->rn_flags |= RUMPNODE_CANRECLAIM;
1050 rn->rn_parent = NULL;
1051 rn->rn_va.va_nlink = 0;
1052
1053 out:
1054 vput(vp);
1055 return rv;
1056 }
1057
1058 static int
1059 rump_vop_remove(void *v)
1060 {
1061 struct vop_remove_v2_args /* {
1062 struct vnode *a_dvp;
1063 struct vnode *a_vp;
1064 struct componentname *a_cnp;
1065 }; */ *ap = v;
1066 struct vnode *dvp = ap->a_dvp;
1067 struct vnode *vp = ap->a_vp;
1068 struct componentname *cnp = ap->a_cnp;
1069 struct rumpfs_node *rnd = dvp->v_data;
1070 struct rumpfs_node *rn = vp->v_data;
1071 int rv = 0;
1072
1073 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1074 return EOPNOTSUPP;
1075
1076 freedir(rnd, cnp);
1077 rn->rn_flags |= RUMPNODE_CANRECLAIM;
1078 rn->rn_va.va_nlink = 0;
1079
1080 vput(vp);
1081 return rv;
1082 }
1083
1084 static int
1085 rump_vop_mknod(void *v)
1086 {
1087 struct vop_mknod_v3_args /* {
1088 struct vnode *a_dvp;
1089 struct vnode **a_vpp;
1090 struct componentname *a_cnp;
1091 struct vattr *a_vap;
1092 }; */ *ap = v;
1093 struct vnode *dvp = ap->a_dvp;
1094 struct vnode **vpp = ap->a_vpp;
1095 struct componentname *cnp = ap->a_cnp;
1096 struct vattr *va = ap->a_vap;
1097 struct rumpfs_node *rnd = dvp->v_data, *rn;
1098 int rv;
1099
1100 rn = makeprivate(va->va_type, va->va_mode & ALLPERMS, va->va_rdev,
1101 DEV_BSIZE, false);
1102 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1103 rn->rn_va.va_flags |= UF_OPAQUE;
1104 rv = vcache_get(dvp->v_mount, &rn, sizeof(rn), vpp);
1105 if (rv) {
1106 freeprivate(rn);
1107 return rv;
1108 }
1109
1110 makedir(rnd, cnp, rn);
1111
1112 return rv;
1113 }
1114
1115 static int
1116 rump_vop_create(void *v)
1117 {
1118 struct vop_create_v3_args /* {
1119 struct vnode *a_dvp;
1120 struct vnode **a_vpp;
1121 struct componentname *a_cnp;
1122 struct vattr *a_vap;
1123 }; */ *ap = v;
1124 struct vnode *dvp = ap->a_dvp;
1125 struct vnode **vpp = ap->a_vpp;
1126 struct componentname *cnp = ap->a_cnp;
1127 struct vattr *va = ap->a_vap;
1128 struct rumpfs_node *rnd = dvp->v_data, *rn;
1129 off_t newsize;
1130 int rv;
1131
1132 newsize = va->va_type == VSOCK ? DEV_BSIZE : 0;
1133 rn = makeprivate(va->va_type, va->va_mode & ALLPERMS, NODEV,
1134 newsize, false);
1135 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1136 rn->rn_va.va_flags |= UF_OPAQUE;
1137 rv = vcache_get(dvp->v_mount, &rn, sizeof(rn), vpp);
1138 if (rv) {
1139 freeprivate(rn);
1140 return rv;
1141 }
1142
1143 makedir(rnd, cnp, rn);
1144
1145 return rv;
1146 }
1147
1148 static int
1149 rump_vop_symlink(void *v)
1150 {
1151 struct vop_symlink_v3_args /* {
1152 struct vnode *a_dvp;
1153 struct vnode **a_vpp;
1154 struct componentname *a_cnp;
1155 struct vattr *a_vap;
1156 char *a_target;
1157 }; */ *ap = v;
1158 struct vnode *dvp = ap->a_dvp;
1159 struct vnode **vpp = ap->a_vpp;
1160 struct componentname *cnp = ap->a_cnp;
1161 struct vattr *va = ap->a_vap;
1162 struct rumpfs_node *rnd = dvp->v_data, *rn;
1163 const char *target = ap->a_target;
1164 size_t linklen;
1165 int rv;
1166
1167 linklen = strlen(target);
1168 KASSERT(linklen < MAXPATHLEN);
1169 rn = makeprivate(VLNK, va->va_mode & ALLPERMS, NODEV, linklen, false);
1170 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1171 rn->rn_va.va_flags |= UF_OPAQUE;
1172 rv = vcache_get(dvp->v_mount, &rn, sizeof(rn), vpp);
1173 if (rv) {
1174 freeprivate(rn);
1175 return rv;
1176 }
1177
1178 makedir(rnd, cnp, rn);
1179
1180 KASSERT(linklen < MAXPATHLEN);
1181 rn->rn_linktarg = PNBUF_GET();
1182 rn->rn_linklen = linklen;
1183 strcpy(rn->rn_linktarg, target);
1184
1185 return rv;
1186 }
1187
1188 static int
1189 rump_vop_readlink(void *v)
1190 {
1191 struct vop_readlink_args /* {
1192 struct vnode *a_vp;
1193 struct uio *a_uio;
1194 kauth_cred_t a_cred;
1195 }; */ *ap = v;
1196 struct vnode *vp = ap->a_vp;
1197 struct rumpfs_node *rn = vp->v_data;
1198 struct uio *uio = ap->a_uio;
1199
1200 return uiomove(rn->rn_linktarg, rn->rn_linklen, uio);
1201 }
1202
1203 static int
1204 rump_vop_whiteout(void *v)
1205 {
1206 struct vop_whiteout_args /* {
1207 struct vnode *a_dvp;
1208 struct componentname *a_cnp;
1209 int a_flags;
1210 } */ *ap = v;
1211 struct vnode *dvp = ap->a_dvp;
1212 struct rumpfs_node *rnd = dvp->v_data;
1213 struct componentname *cnp = ap->a_cnp;
1214 int flags = ap->a_flags;
1215
1216 switch (flags) {
1217 case LOOKUP:
1218 break;
1219 case CREATE:
1220 makedir(rnd, cnp, RUMPFS_WHITEOUT);
1221 break;
1222 case DELETE:
1223 cnp->cn_flags &= ~DOWHITEOUT; /* cargo culting never fails ? */
1224 freedir(rnd, cnp);
1225 break;
1226 default:
1227 panic("unknown whiteout op %d", flags);
1228 }
1229
1230 return 0;
1231 }
1232
1233 static int
1234 rump_vop_open(void *v)
1235 {
1236 struct vop_open_args /* {
1237 struct vnode *a_vp;
1238 int a_mode;
1239 kauth_cred_t a_cred;
1240 } */ *ap = v;
1241 struct vnode *vp = ap->a_vp;
1242 struct rumpfs_node *rn = vp->v_data;
1243 int mode = ap->a_mode;
1244 int error = EINVAL;
1245
1246 if (vp->v_type != VREG || (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0)
1247 return 0;
1248
1249 if (mode & FREAD) {
1250 if (rn->rn_readfd != -1)
1251 return 0;
1252 error = rumpuser_open(rn->rn_hostpath,
1253 RUMPUSER_OPEN_RDONLY, &rn->rn_readfd);
1254 }
1255
1256 if (mode & FWRITE) {
1257 if (rn->rn_writefd != -1)
1258 return 0;
1259 error = rumpuser_open(rn->rn_hostpath,
1260 RUMPUSER_OPEN_WRONLY, &rn->rn_writefd);
1261 }
1262
1263 return error;
1264 }
1265
1266 /* simple readdir. even omits dotstuff and periods */
1267 static int
1268 rump_vop_readdir(void *v)
1269 {
1270 struct vop_readdir_args /* {
1271 struct vnode *a_vp;
1272 struct uio *a_uio;
1273 kauth_cred_t a_cred;
1274 int *a_eofflag;
1275 off_t **a_cookies;
1276 int *a_ncookies;
1277 } */ *ap = v;
1278 struct vnode *vp = ap->a_vp;
1279 struct uio *uio = ap->a_uio;
1280 struct rumpfs_node *rnd = vp->v_data;
1281 struct rumpfs_dent *rdent;
1282 struct dirent *dentp = NULL;
1283 unsigned i;
1284 int rv = 0;
1285
1286 /* seek to current entry */
1287 for (i = 0, rdent = LIST_FIRST(&rnd->rn_dir);
1288 (i < uio->uio_offset) && rdent;
1289 i++, rdent = LIST_NEXT(rdent, rd_entries))
1290 continue;
1291 if (!rdent)
1292 goto out;
1293
1294 /* copy entries */
1295 dentp = kmem_alloc(sizeof(*dentp), KM_SLEEP);
1296 for (; rdent && uio->uio_resid > 0;
1297 rdent = LIST_NEXT(rdent, rd_entries), i++) {
1298 strlcpy(dentp->d_name, rdent->rd_name, sizeof(dentp->d_name));
1299 dentp->d_namlen = strlen(dentp->d_name);
1300 dentp->d_reclen = _DIRENT_RECLEN(dentp, dentp->d_namlen);
1301
1302 if (__predict_false(RDENT_ISWHITEOUT(rdent))) {
1303 dentp->d_fileno = INO_WHITEOUT;
1304 dentp->d_type = DT_WHT;
1305 } else {
1306 dentp->d_fileno = rdent->rd_node->rn_va.va_fileid;
1307 dentp->d_type = vtype2dt(rdent->rd_node->rn_va.va_type);
1308 }
1309
1310 if (uio->uio_resid < dentp->d_reclen) {
1311 i--;
1312 break;
1313 }
1314
1315 rv = uiomove(dentp, dentp->d_reclen, uio);
1316 if (rv) {
1317 i--;
1318 break;
1319 }
1320 }
1321 kmem_free(dentp, sizeof(*dentp));
1322 dentp = NULL;
1323
1324 out:
1325 KASSERT(dentp == NULL);
1326 if (ap->a_cookies) {
1327 *ap->a_ncookies = 0;
1328 *ap->a_cookies = NULL;
1329 }
1330 if (rdent)
1331 *ap->a_eofflag = 0;
1332 else
1333 *ap->a_eofflag = 1;
1334 uio->uio_offset = i;
1335
1336 return rv;
1337 }
1338
1339 static int
1340 etread(struct rumpfs_node *rn, struct uio *uio)
1341 {
1342 struct rumpuser_iovec iov;
1343 uint8_t *buf;
1344 size_t bufsize, n;
1345 int error = 0;
1346
1347 bufsize = uio->uio_resid;
1348 if (bufsize == 0)
1349 return 0;
1350 buf = kmem_alloc(bufsize, KM_SLEEP);
1351
1352 iov.iov_base = buf;
1353 iov.iov_len = bufsize;
1354 if ((error = rumpuser_iovread(rn->rn_readfd, &iov, 1,
1355 uio->uio_offset + rn->rn_offset, &n)) == 0) {
1356 KASSERT(n <= bufsize);
1357 error = uiomove(buf, n, uio);
1358 }
1359
1360 kmem_free(buf, bufsize);
1361 return error;
1362 }
1363
1364 static int
1365 rump_vop_read(void *v)
1366 {
1367 struct vop_read_args /* {
1368 struct vnode *a_vp;
1369 struct uio *a_uio;
1370 int ioflags a_ioflag;
1371 kauth_cred_t a_cred;
1372 }; */ *ap = v;
1373 struct vnode *vp = ap->a_vp;
1374 struct rumpfs_node *rn = vp->v_data;
1375 struct uio *uio = ap->a_uio;
1376 const int advice = IO_ADV_DECODE(ap->a_ioflag);
1377 off_t chunk;
1378 int error = 0;
1379 struct timespec ts;
1380
1381 if (vp->v_type == VDIR)
1382 return EISDIR;
1383
1384 /* et op? */
1385 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1386 return etread(rn, uio);
1387
1388 getnanotime(&ts);
1389 (void)rumpfs_update(RUMPFS_ACCESS, vp, &ts, &ts, &ts);
1390
1391 /* otherwise, it's off to ubc with us */
1392 while (uio->uio_resid > 0) {
1393 chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1394 if (chunk == 0)
1395 break;
1396 error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1397 UBC_READ | UBC_PARTIALOK | UBC_VNODE_FLAGS(vp));
1398 if (error)
1399 break;
1400 }
1401
1402 return error;
1403 }
1404
1405 static int
1406 etwrite(struct rumpfs_node *rn, struct uio *uio)
1407 {
1408 struct rumpuser_iovec iov;
1409 uint8_t *buf;
1410 size_t bufsize, n;
1411 int error = 0;
1412
1413 bufsize = uio->uio_resid;
1414 if (bufsize == 0)
1415 return 0;
1416 buf = kmem_alloc(bufsize, KM_SLEEP);
1417 error = uiomove(buf, bufsize, uio);
1418 if (error)
1419 goto out;
1420
1421 KASSERT(uio->uio_resid == 0);
1422 iov.iov_base = buf;
1423 iov.iov_len = bufsize;
1424 if ((error = rumpuser_iovwrite(rn->rn_writefd, &iov, 1,
1425 (uio->uio_offset-bufsize) + rn->rn_offset, &n)) == 0) {
1426 KASSERT(n <= bufsize);
1427 uio->uio_resid = bufsize - n;
1428 }
1429
1430 out:
1431 kmem_free(buf, bufsize);
1432 return error;
1433 }
1434
1435 static int
1436 rump_vop_write(void *v)
1437 {
1438 struct vop_write_args /* {
1439 struct vnode *a_vp;
1440 struct uio *a_uio;
1441 int ioflags a_ioflag;
1442 kauth_cred_t a_cred;
1443 }; */ *ap = v;
1444 struct vnode *vp = ap->a_vp;
1445 struct rumpfs_node *rn = vp->v_data;
1446 struct uio *uio = ap->a_uio;
1447 const int advice = IO_ADV_DECODE(ap->a_ioflag);
1448 void *olddata;
1449 size_t oldlen, newlen;
1450 off_t chunk;
1451 int error = 0;
1452 bool allocd = false;
1453 struct timespec ts;
1454
1455 getnanotime(&ts);
1456 (void)rumpfs_update(RUMPFS_MODIFY, vp, &ts, &ts, &ts);
1457
1458 if (ap->a_ioflag & IO_APPEND)
1459 uio->uio_offset = vp->v_size;
1460
1461 /* consult et? */
1462 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1463 return etwrite(rn, uio);
1464
1465 /*
1466 * Otherwise, it's a case of ubcmove.
1467 */
1468
1469 /*
1470 * First, make sure we have enough storage.
1471 *
1472 * No, you don't need to tell me it's not very efficient.
1473 * No, it doesn't really support sparse files, just fakes it.
1474 */
1475 newlen = uio->uio_offset + uio->uio_resid;
1476 oldlen = 0; /* XXXgcc */
1477 olddata = NULL;
1478 if (rn->rn_dlen < newlen) {
1479 oldlen = rn->rn_dlen;
1480 olddata = rn->rn_data;
1481
1482 rn->rn_data = rump_hypermalloc(newlen, 0, false, "rumpfs");
1483 if (rn->rn_data == NULL)
1484 return ENOSPC;
1485 rn->rn_dlen = newlen;
1486 memset(rn->rn_data, 0, newlen);
1487 if (oldlen > 0)
1488 memcpy(rn->rn_data, olddata, oldlen);
1489 allocd = true;
1490 uvm_vnp_setsize(vp, newlen);
1491 }
1492
1493 /* ok, we have enough stooorage. write */
1494 while (uio->uio_resid > 0) {
1495 chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1496 if (chunk == 0)
1497 break;
1498 error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1499 UBC_WRITE | UBC_PARTIALOK | UBC_VNODE_FLAGS(vp));
1500 if (error)
1501 break;
1502 }
1503
1504 if (allocd) {
1505 if (error) {
1506 rump_hyperfree(rn->rn_data, newlen);
1507 rn->rn_data = olddata;
1508 rn->rn_dlen = oldlen;
1509 uvm_vnp_setsize(vp, oldlen);
1510 } else {
1511 if ((rn->rn_flags & RUMPNODE_EXTSTORAGE) == 0) {
1512 rump_hyperfree(olddata, oldlen);
1513 } else {
1514 rn->rn_flags &= ~RUMPNODE_EXTSTORAGE;
1515 }
1516 }
1517 }
1518
1519 return error;
1520 }
1521
1522 static int
1523 rump_vop_bmap(void *v)
1524 {
1525 struct vop_bmap_args /* {
1526 struct vnode *a_vp;
1527 daddr_t a_bn;
1528 struct vnode **a_vpp;
1529 daddr_t *a_bnp;
1530 int *a_runp;
1531 } */ *ap = v;
1532
1533 /* 1:1 mapping */
1534 if (ap->a_vpp)
1535 *ap->a_vpp = ap->a_vp;
1536 if (ap->a_bnp)
1537 *ap->a_bnp = ap->a_bn;
1538 if (ap->a_runp)
1539 *ap->a_runp = 16;
1540
1541 return 0;
1542 }
1543
1544 static int
1545 rump_vop_strategy(void *v)
1546 {
1547 struct vop_strategy_args /* {
1548 struct vnode *a_vp;
1549 struct buf *a_bp;
1550 } */ *ap = v;
1551 struct vnode *vp = ap->a_vp;
1552 struct rumpfs_node *rn = vp->v_data;
1553 struct buf *bp = ap->a_bp;
1554 off_t copylen, copyoff;
1555 int error;
1556
1557 if (vp->v_type != VREG || rn->rn_flags & RUMPNODE_ET_PHONE_HOST) {
1558 error = EINVAL;
1559 goto out;
1560 }
1561
1562 copyoff = bp->b_blkno << DEV_BSHIFT;
1563 copylen = MIN(rn->rn_dlen - copyoff, bp->b_bcount);
1564 if (BUF_ISWRITE(bp)) {
1565 memcpy((uint8_t *)rn->rn_data + copyoff, bp->b_data, copylen);
1566 } else {
1567 memset((uint8_t*)bp->b_data + copylen, 0, bp->b_bcount-copylen);
1568 memcpy(bp->b_data, (uint8_t *)rn->rn_data + copyoff, copylen);
1569 }
1570 bp->b_resid = 0;
1571 error = 0;
1572
1573 out:
1574 bp->b_error = error;
1575 biodone(bp);
1576 return 0;
1577 }
1578
1579 static int
1580 rump_vop_pathconf(void *v)
1581 {
1582 struct vop_pathconf_args /* {
1583 struct vnode *a_vp;
1584 int a_name;
1585 register_t *a_retval;
1586 }; */ *ap = v;
1587 int name = ap->a_name;
1588 register_t *retval = ap->a_retval;
1589
1590 switch (name) {
1591 case _PC_LINK_MAX:
1592 *retval = LINK_MAX;
1593 return 0;
1594 case _PC_NAME_MAX:
1595 *retval = RUMPFS_MAXNAMLEN;
1596 return 0;
1597 case _PC_PATH_MAX:
1598 *retval = PATH_MAX;
1599 return 0;
1600 case _PC_PIPE_BUF:
1601 *retval = PIPE_BUF;
1602 return 0;
1603 case _PC_CHOWN_RESTRICTED:
1604 *retval = 1;
1605 return 0;
1606 case _PC_NO_TRUNC:
1607 *retval = 1;
1608 return 0;
1609 case _PC_SYNC_IO:
1610 *retval = 1;
1611 return 0;
1612 case _PC_FILESIZEBITS:
1613 *retval = 43; /* this one goes to 11 */
1614 return 0;
1615 case _PC_SYMLINK_MAX:
1616 *retval = MAXPATHLEN;
1617 return 0;
1618 case _PC_2_SYMLINKS:
1619 *retval = 1;
1620 return 0;
1621 default:
1622 return EINVAL;
1623 }
1624 }
1625
1626 static int
1627 rump_vop_success(void *v)
1628 {
1629
1630 return 0;
1631 }
1632
1633 static int
1634 rump_vop_inactive(void *v)
1635 {
1636 struct vop_inactive_v2_args /* {
1637 struct vnode *a_vp;
1638 bool *a_recycle;
1639 } */ *ap = v;
1640 struct vnode *vp = ap->a_vp;
1641 struct rumpfs_node *rn = vp->v_data;
1642
1643 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST && vp->v_type == VREG) {
1644 if (rn->rn_readfd != -1) {
1645 rumpuser_close(rn->rn_readfd);
1646 rn->rn_readfd = -1;
1647 }
1648 if (rn->rn_writefd != -1) {
1649 rumpuser_close(rn->rn_writefd);
1650 rn->rn_writefd = -1;
1651 }
1652 }
1653 *ap->a_recycle = (rn->rn_flags & RUMPNODE_CANRECLAIM) ? true : false;
1654
1655 return 0;
1656 }
1657
1658 static int
1659 rump_vop_reclaim(void *v)
1660 {
1661 struct vop_reclaim_v2_args /* {
1662 struct vnode *a_vp;
1663 } */ *ap = v;
1664 struct vnode *vp = ap->a_vp;
1665 struct rumpfs_node *rn = vp->v_data;
1666
1667 VOP_UNLOCK(vp);
1668
1669 mutex_enter(&reclock);
1670 rn->rn_vp = NULL;
1671 mutex_exit(&reclock);
1672 genfs_node_destroy(vp);
1673 vp->v_data = NULL;
1674
1675 if (rn->rn_flags & RUMPNODE_CANRECLAIM) {
1676 if (vp->v_type == VREG
1677 && (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0
1678 && rn->rn_data) {
1679 if ((rn->rn_flags & RUMPNODE_EXTSTORAGE) == 0) {
1680 rump_hyperfree(rn->rn_data, rn->rn_dlen);
1681 } else {
1682 rn->rn_flags &= ~RUMPNODE_EXTSTORAGE;
1683 }
1684 rn->rn_data = NULL;
1685 }
1686
1687 if (vp->v_type == VLNK)
1688 PNBUF_PUT(rn->rn_linktarg);
1689 if (rn->rn_hostpath)
1690 free(rn->rn_hostpath, M_TEMP);
1691 freeprivate(rn);
1692 }
1693
1694 return 0;
1695 }
1696
1697 static int
1698 rump_vop_spec(void *v)
1699 {
1700 struct vop_generic_args *ap = v;
1701 int (**opvec)(void *);
1702
1703 switch (ap->a_desc->vdesc_offset) {
1704 case VOP_ACCESS_DESCOFFSET:
1705 case VOP_GETATTR_DESCOFFSET:
1706 case VOP_SETATTR_DESCOFFSET:
1707 case VOP_LOCK_DESCOFFSET:
1708 case VOP_UNLOCK_DESCOFFSET:
1709 case VOP_ISLOCKED_DESCOFFSET:
1710 case VOP_INACTIVE_DESCOFFSET:
1711 case VOP_RECLAIM_DESCOFFSET:
1712 opvec = rump_vnodeop_p;
1713 break;
1714 default:
1715 opvec = spec_vnodeop_p;
1716 break;
1717 }
1718
1719 return VOCALL(opvec, ap->a_desc->vdesc_offset, v);
1720 }
1721
1722 static int
1723 rump_vop_advlock(void *v)
1724 {
1725 struct vop_advlock_args /* {
1726 const struct vnodeop_desc *a_desc;
1727 struct vnode *a_vp;
1728 void *a_id;
1729 int a_op;
1730 struct flock *a_fl;
1731 int a_flags;
1732 } */ *ap = v;
1733 struct vnode *vp = ap->a_vp;
1734 struct rumpfs_node *rn = vp->v_data;
1735
1736 return lf_advlock(ap, &rn->rn_lockf, vp->v_size);
1737 }
1738
1739 static int
1740 rump_vop_fcntl(void *v)
1741 {
1742 struct vop_fcntl_args /* {
1743 struct vnode *a_vp;
1744 u_int a_command;
1745 void *a_data;
1746 int a_fflag;
1747 kauth_cred_t a_cred;
1748 } */ *ap = v;
1749 struct proc *p = curproc;
1750 struct vnode *vp = ap->a_vp;
1751 struct rumpfs_node *rn = vp->v_data;
1752 u_int cmd = ap->a_command;
1753 int fflag = ap->a_fflag;
1754 struct rumpfs_extstorage *rfse = ap->a_data;
1755 int error = 0;
1756
1757 /* none of the current rumpfs fcntlops are defined for remotes */
1758 if (!RUMP_LOCALPROC_P(p))
1759 return EINVAL;
1760
1761 switch (cmd) {
1762 case RUMPFS_FCNTL_EXTSTORAGE_ADD:
1763 break;
1764 default:
1765 return EINVAL;
1766 }
1767
1768 if ((fflag & FWRITE) == 0)
1769 return EBADF;
1770
1771 if (vp->v_type != VREG || (rn->rn_flags & RUMPNODE_ET_PHONE_HOST))
1772 return EINVAL;
1773
1774 if (rfse->rfse_flags != 0)
1775 return EINVAL;
1776
1777 /*
1778 * Ok, we are good to go. Process.
1779 */
1780
1781 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1782
1783 KASSERT(cmd == RUMPFS_FCNTL_EXTSTORAGE_ADD);
1784 if (rn->rn_data && (rn->rn_flags & RUMPNODE_EXTSTORAGE) == 0) {
1785 rump_hyperfree(rn->rn_data, rn->rn_dlen);
1786 }
1787
1788 rn->rn_data = rfse->rfse_data;
1789 rn->rn_dlen = rfse->rfse_dlen;
1790 uvm_vnp_setsize(vp, rn->rn_dlen);
1791 rn->rn_flags |= RUMPNODE_EXTSTORAGE;
1792
1793 VOP_UNLOCK(vp);
1794
1795 return error;
1796 }
1797
1798 /*
1799 * Begin vfs-level stuff
1800 */
1801
1802 VFS_PROTOS(rumpfs);
1803 struct vfsops rumpfs_vfsops = {
1804 .vfs_name = MOUNT_RUMPFS,
1805 .vfs_min_mount_data = 0,
1806 .vfs_mount = rumpfs_mount,
1807 .vfs_start = (void *)nullop,
1808 .vfs_unmount = rumpfs_unmount,
1809 .vfs_root = rumpfs_root,
1810 .vfs_quotactl = (void *)eopnotsupp,
1811 .vfs_statvfs = genfs_statvfs,
1812 .vfs_sync = (void *)nullop,
1813 .vfs_vget = rumpfs_vget,
1814 .vfs_loadvnode = rumpfs_loadvnode,
1815 .vfs_fhtovp = (void *)eopnotsupp,
1816 .vfs_vptofh = (void *)eopnotsupp,
1817 .vfs_init = rumpfs_init,
1818 .vfs_reinit = NULL,
1819 .vfs_done = rumpfs_done,
1820 .vfs_mountroot = rumpfs_mountroot,
1821 .vfs_snapshot = (void *)eopnotsupp,
1822 .vfs_extattrctl = (void *)eopnotsupp,
1823 .vfs_suspendctl = genfs_suspendctl,
1824 .vfs_renamelock_enter = genfs_renamelock_enter,
1825 .vfs_renamelock_exit = genfs_renamelock_exit,
1826 .vfs_opv_descs = rump_opv_descs,
1827 /* vfs_refcount */
1828 /* vfs_list */
1829 };
1830
1831 static int
1832 rumpfs_mountfs(struct mount *mp)
1833 {
1834 struct rumpfs_mount *rfsmp;
1835 struct rumpfs_node *rn;
1836 int error;
1837
1838 rfsmp = kmem_alloc(sizeof(*rfsmp), KM_SLEEP);
1839
1840 rn = makeprivate(VDIR, RUMPFS_DEFAULTMODE, NODEV, DEV_BSIZE, false);
1841 rn->rn_parent = rn;
1842 if ((error = vcache_get(mp, &rn, sizeof(rn), &rfsmp->rfsmp_rvp))
1843 != 0) {
1844 freeprivate(rn);
1845 kmem_free(rfsmp, sizeof(*rfsmp));
1846 return error;
1847 }
1848
1849 rfsmp->rfsmp_rvp->v_vflag |= VV_ROOT;
1850
1851 mp->mnt_data = rfsmp;
1852 mp->mnt_stat.f_namemax = RUMPFS_MAXNAMLEN;
1853 mp->mnt_stat.f_iosize = 512;
1854 mp->mnt_flag |= MNT_LOCAL;
1855 mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO;
1856 mp->mnt_fs_bshift = DEV_BSHIFT;
1857 vfs_getnewfsid(mp);
1858
1859 return 0;
1860 }
1861
1862 int
1863 rumpfs_mount(struct mount *mp, const char *mntpath, void *arg, size_t *alen)
1864 {
1865 int error, flags;
1866
1867 if (mp->mnt_flag & MNT_GETARGS) {
1868 return 0;
1869 }
1870 if (mp->mnt_flag & MNT_UPDATE) {
1871 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) {
1872 /* Changing from read/write to read-only. */
1873 flags = WRITECLOSE;
1874 if ((mp->mnt_flag & MNT_FORCE))
1875 flags |= FORCECLOSE;
1876 error = vflush(mp, NULL, flags);
1877 if (error)
1878 return error;
1879 }
1880 return 0;
1881 }
1882
1883 error = set_statvfs_info(mntpath, UIO_USERSPACE, "rumpfs", UIO_SYSSPACE,
1884 mp->mnt_op->vfs_name, mp, curlwp);
1885 if (error)
1886 return error;
1887
1888 return rumpfs_mountfs(mp);
1889 }
1890
1891 int
1892 rumpfs_unmount(struct mount *mp, int mntflags)
1893 {
1894 struct rumpfs_mount *rfsmp = mp->mnt_data;
1895 int flags = 0, error;
1896
1897 if (panicstr || mntflags & MNT_FORCE)
1898 flags |= FORCECLOSE;
1899
1900 if (vrefcnt(rfsmp->rfsmp_rvp) > 1 && (flags & FORCECLOSE) == 0)
1901 return EBUSY;
1902
1903 if ((error = vflush(mp, rfsmp->rfsmp_rvp, flags)) != 0)
1904 return error;
1905 vgone(rfsmp->rfsmp_rvp);
1906
1907 kmem_free(rfsmp, sizeof(*rfsmp));
1908
1909 return 0;
1910 }
1911
1912 int
1913 rumpfs_root(struct mount *mp, int lktype, struct vnode **vpp)
1914 {
1915 struct rumpfs_mount *rfsmp = mp->mnt_data;
1916
1917 vref(rfsmp->rfsmp_rvp);
1918 vn_lock(rfsmp->rfsmp_rvp, lktype | LK_RETRY);
1919 *vpp = rfsmp->rfsmp_rvp;
1920 return 0;
1921 }
1922
1923 int
1924 rumpfs_vget(struct mount *mp, ino_t ino, int lktype, struct vnode **vpp)
1925 {
1926
1927 return EOPNOTSUPP;
1928 }
1929
1930 int
1931 rumpfs_loadvnode(struct mount *mp, struct vnode *vp,
1932 const void *key, size_t key_len, const void **new_key)
1933 {
1934 struct rumpfs_node *rn;
1935 struct vattr *va;
1936
1937 KASSERT(!mutex_owned(&reclock));
1938
1939 KASSERT(key_len == sizeof(rn));
1940 memcpy(&rn, key, key_len);
1941
1942 va = &rn->rn_va;
1943
1944 vp->v_tag = VT_RUMP;
1945 vp->v_type = va->va_type;
1946 switch (vp->v_type) {
1947 case VCHR:
1948 case VBLK:
1949 vp->v_op = rump_specop_p;
1950 spec_node_init(vp, va->va_rdev);
1951 break;
1952 default:
1953 vp->v_op = rump_vnodeop_p;
1954 break;
1955 }
1956 vp->v_size = vp->v_writesize = va->va_size;
1957 vp->v_data = rn;
1958
1959 genfs_node_init(vp, &rumpfs_genfsops);
1960 mutex_enter(&reclock);
1961 rn->rn_vp = vp;
1962 mutex_exit(&reclock);
1963
1964 *new_key = &vp->v_data;
1965
1966 return 0;
1967 }
1968
1969 void
1970 rumpfs_init()
1971 {
1972 extern rump_etfs_register_withsize_fn rump__etfs_register;
1973 extern rump_etfs_remove_fn rump__etfs_remove;
1974 extern struct rump_boot_etfs *ebstart;
1975 struct rump_boot_etfs *eb;
1976
1977 CTASSERT(RUMP_ETFS_SIZE_ENDOFF == RUMPBLK_SIZENOTSET);
1978
1979 mutex_init(&reclock, MUTEX_DEFAULT, IPL_NONE);
1980 mutex_init(&etfs_lock, MUTEX_DEFAULT, IPL_NONE);
1981
1982 rump__etfs_register = etfsregister;
1983 rump__etfs_remove = etfsremove;
1984
1985 for (eb = ebstart; eb; eb = eb->_eb_next) {
1986 eb->eb_status = etfsregister(eb->eb_key, eb->eb_hostpath,
1987 eb->eb_type, eb->eb_begin, eb->eb_size);
1988 }
1989 }
1990
1991 void
1992 rumpfs_done()
1993 {
1994
1995 mutex_destroy(&reclock);
1996 mutex_destroy(&etfs_lock);
1997 }
1998
1999 int
2000 rumpfs_mountroot()
2001 {
2002 struct mount *mp;
2003 int error;
2004
2005 if ((error = vfs_rootmountalloc(MOUNT_RUMPFS, "rootdev", &mp)) != 0) {
2006 vrele(rootvp);
2007 return error;
2008 }
2009
2010 if ((error = rumpfs_mountfs(mp)) != 0)
2011 panic("mounting rootfs failed: %d", error);
2012
2013 mountlist_append(mp);
2014
2015 error = set_statvfs_info("/", UIO_SYSSPACE, "rumpfs", UIO_SYSSPACE,
2016 mp->mnt_op->vfs_name, mp, curlwp);
2017 if (error)
2018 panic("set_statvfs_info failed for rootfs: %d", error);
2019
2020 mp->mnt_flag &= ~MNT_RDONLY;
2021 vfs_unbusy(mp);
2022
2023 return 0;
2024 }
2025