rumpfs.c revision 1.128 1 /* $NetBSD: rumpfs.c,v 1.128 2014/05/28 20:57:22 justin Exp $ */
2
3 /*
4 * Copyright (c) 2009, 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: rumpfs.c,v 1.128 2014/05/28 20:57:22 justin Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/buf.h>
34 #include <sys/dirent.h>
35 #include <sys/errno.h>
36 #include <sys/filedesc.h>
37 #include <sys/fcntl.h>
38 #include <sys/kauth.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/lock.h>
44 #include <sys/lockf.h>
45 #include <sys/queue.h>
46 #include <sys/stat.h>
47 #include <sys/syscallargs.h>
48 #include <sys/vnode.h>
49 #include <sys/unistd.h>
50
51 #include <miscfs/fifofs/fifo.h>
52 #include <miscfs/specfs/specdev.h>
53 #include <miscfs/genfs/genfs.h>
54 #include <miscfs/genfs/genfs_node.h>
55
56 #include <uvm/uvm_extern.h>
57
58 #include <rump/rumpuser.h>
59
60 #include "rump_private.h"
61 #include "rump_vfs_private.h"
62
63 static int rump_vop_lookup(void *);
64 static int rump_vop_getattr(void *);
65 static int rump_vop_setattr(void *);
66 static int rump_vop_mkdir(void *);
67 static int rump_vop_rmdir(void *);
68 static int rump_vop_remove(void *);
69 static int rump_vop_mknod(void *);
70 static int rump_vop_create(void *);
71 static int rump_vop_inactive(void *);
72 static int rump_vop_reclaim(void *);
73 static int rump_vop_success(void *);
74 static int rump_vop_readdir(void *);
75 static int rump_vop_spec(void *);
76 static int rump_vop_read(void *);
77 static int rump_vop_write(void *);
78 static int rump_vop_open(void *);
79 static int rump_vop_symlink(void *);
80 static int rump_vop_readlink(void *);
81 static int rump_vop_whiteout(void *);
82 static int rump_vop_pathconf(void *);
83 static int rump_vop_bmap(void *);
84 static int rump_vop_strategy(void *);
85 static int rump_vop_advlock(void *);
86 static int rump_vop_access(void *);
87
88 int (**fifo_vnodeop_p)(void *);
89 const struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
90 { &vop_default_desc, vn_default_error },
91 { &vop_putpages_desc, genfs_null_putpages },
92 { NULL, NULL }
93 };
94 const struct vnodeopv_desc fifo_vnodeop_opv_desc =
95 { &fifo_vnodeop_p, fifo_vnodeop_entries };
96
97 int (**rump_vnodeop_p)(void *);
98 const struct vnodeopv_entry_desc rump_vnodeop_entries[] = {
99 { &vop_default_desc, vn_default_error },
100 { &vop_lookup_desc, rump_vop_lookup },
101 { &vop_getattr_desc, rump_vop_getattr },
102 { &vop_setattr_desc, rump_vop_setattr },
103 { &vop_mkdir_desc, rump_vop_mkdir },
104 { &vop_rmdir_desc, rump_vop_rmdir },
105 { &vop_remove_desc, rump_vop_remove },
106 { &vop_mknod_desc, rump_vop_mknod },
107 { &vop_create_desc, rump_vop_create },
108 { &vop_symlink_desc, rump_vop_symlink },
109 { &vop_readlink_desc, rump_vop_readlink },
110 { &vop_access_desc, rump_vop_access },
111 { &vop_readdir_desc, rump_vop_readdir },
112 { &vop_read_desc, rump_vop_read },
113 { &vop_write_desc, rump_vop_write },
114 { &vop_open_desc, rump_vop_open },
115 { &vop_close_desc, genfs_nullop },
116 { &vop_seek_desc, genfs_seek },
117 { &vop_getpages_desc, genfs_getpages },
118 { &vop_putpages_desc, genfs_putpages },
119 { &vop_whiteout_desc, rump_vop_whiteout },
120 { &vop_fsync_desc, rump_vop_success },
121 { &vop_lock_desc, genfs_lock },
122 { &vop_unlock_desc, genfs_unlock },
123 { &vop_islocked_desc, genfs_islocked },
124 { &vop_inactive_desc, rump_vop_inactive },
125 { &vop_reclaim_desc, rump_vop_reclaim },
126 { &vop_link_desc, genfs_eopnotsupp },
127 { &vop_pathconf_desc, rump_vop_pathconf },
128 { &vop_bmap_desc, rump_vop_bmap },
129 { &vop_strategy_desc, rump_vop_strategy },
130 { &vop_advlock_desc, rump_vop_advlock },
131 { NULL, NULL }
132 };
133 const struct vnodeopv_desc rump_vnodeop_opv_desc =
134 { &rump_vnodeop_p, rump_vnodeop_entries };
135
136 int (**rump_specop_p)(void *);
137 const struct vnodeopv_entry_desc rump_specop_entries[] = {
138 { &vop_default_desc, rump_vop_spec },
139 { NULL, NULL }
140 };
141 const struct vnodeopv_desc rump_specop_opv_desc =
142 { &rump_specop_p, rump_specop_entries };
143
144 const struct vnodeopv_desc * const rump_opv_descs[] = {
145 &rump_vnodeop_opv_desc,
146 &rump_specop_opv_desc,
147 NULL
148 };
149
150 #define RUMPFS_WHITEOUT ((void *)-1)
151 #define RDENT_ISWHITEOUT(rdp) (rdp->rd_node == RUMPFS_WHITEOUT)
152 struct rumpfs_dent {
153 char *rd_name;
154 int rd_namelen;
155 struct rumpfs_node *rd_node;
156
157 LIST_ENTRY(rumpfs_dent) rd_entries;
158 };
159
160 struct genfs_ops rumpfs_genfsops = {
161 .gop_size = genfs_size,
162 .gop_write = genfs_gop_write,
163
164 /* optional */
165 .gop_alloc = NULL,
166 .gop_markupdate = NULL,
167 };
168
169 struct rumpfs_node {
170 struct genfs_node rn_gn;
171 struct vattr rn_va;
172 struct vnode *rn_vp;
173 char *rn_hostpath;
174 int rn_flags;
175 struct lockf *rn_lockf;
176
177 union {
178 struct { /* VREG */
179 int readfd;
180 int writefd;
181 uint64_t offset;
182 } reg;
183 struct {
184 void *data;
185 size_t dlen;
186 } reg_noet;
187 struct { /* VDIR */
188 LIST_HEAD(, rumpfs_dent) dents;
189 struct rumpfs_node *parent;
190 int flags;
191 } dir;
192 struct {
193 char *target;
194 size_t len;
195 } link;
196 } rn_u;
197 };
198 #define rn_readfd rn_u.reg.readfd
199 #define rn_writefd rn_u.reg.writefd
200 #define rn_offset rn_u.reg.offset
201 #define rn_data rn_u.reg_noet.data
202 #define rn_dlen rn_u.reg_noet.dlen
203 #define rn_dir rn_u.dir.dents
204 #define rn_parent rn_u.dir.parent
205 #define rn_linktarg rn_u.link.target
206 #define rn_linklen rn_u.link.len
207
208 #define RUMPNODE_CANRECLAIM 0x01
209 #define RUMPNODE_DIR_ET 0x02
210 #define RUMPNODE_DIR_ETSUBS 0x04
211 #define RUMPNODE_ET_PHONE_HOST 0x10
212
213 struct rumpfs_mount {
214 struct vnode *rfsmp_rvp;
215 };
216
217 #define INO_WHITEOUT 1
218 static int lastino = 2;
219 static kmutex_t reclock;
220
221 #define RUMPFS_DEFAULTMODE 0755
222 static void freedir(struct rumpfs_node *, struct componentname *);
223 static struct rumpfs_node *makeprivate(enum vtype, mode_t, dev_t, off_t, bool);
224
225 /*
226 * Extra Terrestrial stuff. We map a given key (pathname) to a file on
227 * the host FS. ET phones home only from the root node of rumpfs.
228 *
229 * When an etfs node is removed, a vnode potentially behind it is not
230 * immediately recycled.
231 */
232
233 struct etfs {
234 char et_key[MAXPATHLEN];
235 size_t et_keylen;
236 bool et_prefixkey;
237 bool et_removing;
238 devminor_t et_blkmin;
239
240 LIST_ENTRY(etfs) et_entries;
241
242 struct rumpfs_node *et_rn;
243 };
244 static kmutex_t etfs_lock;
245 static LIST_HEAD(, etfs) etfs_list = LIST_HEAD_INITIALIZER(etfs_list);
246
247 static enum vtype
248 ettype_to_vtype(enum rump_etfs_type et)
249 {
250 enum vtype vt;
251
252 switch (et) {
253 case RUMP_ETFS_REG:
254 vt = VREG;
255 break;
256 case RUMP_ETFS_BLK:
257 vt = VBLK;
258 break;
259 case RUMP_ETFS_CHR:
260 vt = VCHR;
261 break;
262 case RUMP_ETFS_DIR:
263 vt = VDIR;
264 break;
265 case RUMP_ETFS_DIR_SUBDIRS:
266 vt = VDIR;
267 break;
268 default:
269 panic("invalid et type: %d", et);
270 }
271
272 return vt;
273 }
274
275 static enum vtype
276 hft_to_vtype(int hft)
277 {
278 enum vtype vt;
279
280 switch (hft) {
281 case RUMPUSER_FT_OTHER:
282 vt = VNON;
283 break;
284 case RUMPUSER_FT_DIR:
285 vt = VDIR;
286 break;
287 case RUMPUSER_FT_REG:
288 vt = VREG;
289 break;
290 case RUMPUSER_FT_BLK:
291 vt = VBLK;
292 break;
293 case RUMPUSER_FT_CHR:
294 vt = VCHR;
295 break;
296 default:
297 vt = VNON;
298 break;
299 }
300
301 return vt;
302 }
303
304 static bool
305 etfs_find(const char *key, struct etfs **etp, bool forceprefix)
306 {
307 struct etfs *et;
308 size_t keylen = strlen(key);
309
310 KASSERT(mutex_owned(&etfs_lock));
311
312 LIST_FOREACH(et, &etfs_list, et_entries) {
313 if ((keylen == et->et_keylen || et->et_prefixkey || forceprefix)
314 && strncmp(key, et->et_key, et->et_keylen) == 0) {
315 if (etp)
316 *etp = et;
317 return true;
318 }
319 }
320
321 return false;
322 }
323
324 #define REGDIR(ftype) \
325 ((ftype) == RUMP_ETFS_DIR || (ftype) == RUMP_ETFS_DIR_SUBDIRS)
326 static int
327 etfsregister(const char *key, const char *hostpath,
328 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
329 {
330 char buf[9];
331 struct etfs *et;
332 struct rumpfs_node *rn;
333 uint64_t fsize;
334 dev_t rdev = NODEV;
335 devminor_t dmin = -1;
336 int hft, error;
337
338 if (key[0] != '/') {
339 return EINVAL;
340 }
341 while (key[0] == '/') {
342 key++;
343 }
344
345 if ((error = rumpuser_getfileinfo(hostpath, &fsize, &hft)) != 0)
346 return error;
347
348 /* etfs directory requires a directory on the host */
349 if (REGDIR(ftype)) {
350 if (hft != RUMPUSER_FT_DIR)
351 return ENOTDIR;
352 if (begin != 0)
353 return EISDIR;
354 if (size != RUMP_ETFS_SIZE_ENDOFF)
355 return EISDIR;
356 size = fsize;
357 } else {
358 if (begin > fsize)
359 return EINVAL;
360 if (size == RUMP_ETFS_SIZE_ENDOFF)
361 size = fsize - begin;
362 if (begin + size > fsize)
363 return EINVAL;
364 }
365
366 if (ftype == RUMP_ETFS_BLK || ftype == RUMP_ETFS_CHR) {
367 error = rumpblk_register(hostpath, &dmin, begin, size);
368 if (error != 0) {
369 return error;
370 }
371 rdev = makedev(RUMPBLK_DEVMAJOR, dmin);
372 }
373
374 et = kmem_alloc(sizeof(*et), KM_SLEEP);
375 strcpy(et->et_key, key);
376 et->et_keylen = strlen(et->et_key);
377 et->et_rn = rn = makeprivate(ettype_to_vtype(ftype), RUMPFS_DEFAULTMODE,
378 rdev, size, true);
379 et->et_removing = false;
380 et->et_blkmin = dmin;
381
382 rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
383
384 if (ftype == RUMP_ETFS_REG || REGDIR(ftype) || et->et_blkmin != -1) {
385 size_t len = strlen(hostpath)+1;
386
387 rn->rn_hostpath = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
388 memcpy(rn->rn_hostpath, hostpath, len);
389 rn->rn_offset = begin;
390 }
391
392 if (REGDIR(ftype)) {
393 rn->rn_flags |= RUMPNODE_DIR_ET;
394 et->et_prefixkey = true;
395 } else {
396 et->et_prefixkey = false;
397 }
398
399 if (ftype == RUMP_ETFS_DIR_SUBDIRS)
400 rn->rn_flags |= RUMPNODE_DIR_ETSUBS;
401
402 mutex_enter(&etfs_lock);
403 if (etfs_find(key, NULL, REGDIR(ftype))) {
404 mutex_exit(&etfs_lock);
405 if (et->et_blkmin != -1)
406 rumpblk_deregister(hostpath);
407 if (et->et_rn->rn_hostpath != NULL)
408 free(et->et_rn->rn_hostpath, M_TEMP);
409 kmem_free(et->et_rn, sizeof(*et->et_rn));
410 kmem_free(et, sizeof(*et));
411 return EEXIST;
412 }
413 LIST_INSERT_HEAD(&etfs_list, et, et_entries);
414 mutex_exit(&etfs_lock);
415
416 if (ftype == RUMP_ETFS_BLK) {
417 format_bytes(buf, sizeof(buf), size);
418 aprint_verbose("/%s: hostpath %s (%s)\n", key, hostpath, buf);
419 }
420
421 return 0;
422 }
423 #undef REGDIR
424
425 /* remove etfs mapping. caller's responsibility to make sure it's not in use */
426 static int
427 etfsremove(const char *key)
428 {
429 struct etfs *et;
430 size_t keylen;
431 int rv __diagused;
432
433 if (key[0] != '/') {
434 return EINVAL;
435 }
436 while (key[0] == '/') {
437 key++;
438 }
439
440 keylen = strlen(key);
441
442 mutex_enter(&etfs_lock);
443 LIST_FOREACH(et, &etfs_list, et_entries) {
444 if (keylen == et->et_keylen && strcmp(et->et_key, key) == 0) {
445 if (et->et_removing)
446 et = NULL;
447 else
448 et->et_removing = true;
449 break;
450 }
451 }
452 mutex_exit(&etfs_lock);
453 if (!et)
454 return ENOENT;
455
456 /*
457 * ok, we know what we want to remove and have signalled there
458 * actually are men at work. first, unregister from rumpblk
459 */
460 if (et->et_blkmin != -1) {
461 rv = rumpblk_deregister(et->et_rn->rn_hostpath);
462 } else {
463 rv = 0;
464 }
465 KASSERT(rv == 0);
466
467 /* then do the actual removal */
468 mutex_enter(&etfs_lock);
469 LIST_REMOVE(et, et_entries);
470 mutex_exit(&etfs_lock);
471
472 /* node is unreachable, safe to nuke all device copies */
473 if (et->et_blkmin != -1) {
474 vdevgone(RUMPBLK_DEVMAJOR, et->et_blkmin, et->et_blkmin, VBLK);
475 } else {
476 struct vnode *vp;
477
478 mutex_enter(&reclock);
479 if ((vp = et->et_rn->rn_vp) != NULL)
480 mutex_enter(vp->v_interlock);
481 mutex_exit(&reclock);
482 if (vp && vget(vp, 0) == 0)
483 vgone(vp);
484 }
485
486 if (et->et_rn->rn_hostpath != NULL)
487 free(et->et_rn->rn_hostpath, M_TEMP);
488 kmem_free(et->et_rn, sizeof(*et->et_rn));
489 kmem_free(et, sizeof(*et));
490
491 return 0;
492 }
493
494 /*
495 * rumpfs
496 */
497
498 static struct rumpfs_node *
499 makeprivate(enum vtype vt, mode_t mode, dev_t rdev, off_t size, bool et)
500 {
501 struct rumpfs_node *rn;
502 struct vattr *va;
503 struct timespec ts;
504
505 KASSERT((mode & ~ALLPERMS) == 0);
506 rn = kmem_zalloc(sizeof(*rn), KM_SLEEP);
507
508 switch (vt) {
509 case VDIR:
510 LIST_INIT(&rn->rn_dir);
511 break;
512 case VREG:
513 if (et) {
514 rn->rn_readfd = -1;
515 rn->rn_writefd = -1;
516 }
517 break;
518 default:
519 break;
520 }
521
522 nanotime(&ts);
523
524 va = &rn->rn_va;
525 va->va_type = vt;
526 va->va_mode = mode;
527 if (vt == VDIR)
528 va->va_nlink = 2;
529 else
530 va->va_nlink = 1;
531 va->va_uid = 0;
532 va->va_gid = 0;
533 va->va_fsid =
534 va->va_fileid = atomic_inc_uint_nv(&lastino);
535 va->va_size = size;
536 va->va_blocksize = 512;
537 va->va_atime = ts;
538 va->va_mtime = ts;
539 va->va_ctime = ts;
540 va->va_birthtime = ts;
541 va->va_gen = 0;
542 va->va_flags = 0;
543 va->va_rdev = rdev;
544 va->va_bytes = 512;
545 va->va_filerev = 0;
546 va->va_vaflags = 0;
547
548 return rn;
549 }
550
551 static int
552 makevnode(struct mount *mp, struct rumpfs_node *rn, struct vnode **vpp)
553 {
554 struct vnode *vp;
555 int (**vpops)(void *);
556 struct vattr *va = &rn->rn_va;
557 int rv;
558
559 KASSERT(!mutex_owned(&reclock));
560
561 if (va->va_type == VCHR || va->va_type == VBLK) {
562 vpops = rump_specop_p;
563 } else {
564 vpops = rump_vnodeop_p;
565 }
566
567 rv = getnewvnode(VT_RUMP, mp, vpops, NULL, &vp);
568 if (rv)
569 return rv;
570
571 vp->v_size = vp->v_writesize = va->va_size;
572 vp->v_type = va->va_type;
573
574 if (vpops == rump_specop_p) {
575 spec_node_init(vp, va->va_rdev);
576 }
577 vp->v_data = rn;
578
579 genfs_node_init(vp, &rumpfs_genfsops);
580 mutex_enter(&reclock);
581 rn->rn_vp = vp;
582 mutex_exit(&reclock);
583
584 *vpp = vp;
585
586 return 0;
587 }
588
589
590 static void
591 makedir(struct rumpfs_node *rnd,
592 struct componentname *cnp, struct rumpfs_node *rn)
593 {
594 struct rumpfs_dent *rdent;
595
596 rdent = kmem_alloc(sizeof(*rdent), KM_SLEEP);
597 rdent->rd_name = kmem_alloc(cnp->cn_namelen+1, KM_SLEEP);
598 rdent->rd_node = rn;
599 strlcpy(rdent->rd_name, cnp->cn_nameptr, cnp->cn_namelen+1);
600 rdent->rd_namelen = strlen(rdent->rd_name);
601
602 if ((cnp->cn_flags & ISWHITEOUT) != 0) {
603 KASSERT((cnp->cn_flags & DOWHITEOUT) == 0);
604 freedir(rnd, cnp);
605 }
606 LIST_INSERT_HEAD(&rnd->rn_dir, rdent, rd_entries);
607 }
608
609 static void
610 freedir(struct rumpfs_node *rnd, struct componentname *cnp)
611 {
612 struct rumpfs_dent *rd = NULL;
613
614 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
615 if (rd->rd_namelen == cnp->cn_namelen &&
616 strncmp(rd->rd_name, cnp->cn_nameptr,
617 cnp->cn_namelen) == 0)
618 break;
619 }
620 if (rd == NULL)
621 panic("could not find directory entry: %s", cnp->cn_nameptr);
622
623 if (cnp->cn_flags & DOWHITEOUT) {
624 rd->rd_node = RUMPFS_WHITEOUT;
625 } else {
626 LIST_REMOVE(rd, rd_entries);
627 kmem_free(rd->rd_name, rd->rd_namelen+1);
628 kmem_free(rd, sizeof(*rd));
629 }
630 }
631
632 /*
633 * Simple lookup for rump file systems.
634 *
635 * uhm, this is twisted. C F C C, hope of C C F C looming
636 */
637 static int
638 rump_vop_lookup(void *v)
639 {
640 struct vop_lookup_v2_args /* {
641 struct vnode *a_dvp;
642 struct vnode **a_vpp;
643 struct componentname *a_cnp;
644 }; */ *ap = v;
645 struct componentname *cnp = ap->a_cnp;
646 struct vnode *dvp = ap->a_dvp;
647 struct vnode **vpp = ap->a_vpp;
648 struct vnode *vp;
649 struct rumpfs_node *rnd = dvp->v_data, *rn;
650 struct rumpfs_dent *rd = NULL;
651 struct etfs *et;
652 bool dotdot = (cnp->cn_flags & ISDOTDOT) != 0;
653 int rv = 0;
654 const char *cp;
655
656 *vpp = NULL;
657
658 rv = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
659 if (rv)
660 return rv;
661
662 if ((cnp->cn_flags & ISLASTCN)
663 && (dvp->v_mount->mnt_flag & MNT_RDONLY)
664 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
665 return EROFS;
666
667 /* check for dot, return directly if the case */
668 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
669 vref(dvp);
670 *vpp = dvp;
671 return 0;
672 }
673
674 /* we don't do rename */
675 if (!(((cnp->cn_flags & ISLASTCN) == 0) || (cnp->cn_nameiop != RENAME)))
676 return EOPNOTSUPP;
677
678 /* check for etfs */
679 if (dvp == rootvnode &&
680 (cnp->cn_nameiop == LOOKUP || cnp->cn_nameiop == CREATE)) {
681 bool found;
682 mutex_enter(&etfs_lock);
683 found = etfs_find(cnp->cn_nameptr, &et, false);
684 mutex_exit(&etfs_lock);
685
686 if (found) {
687 rn = et->et_rn;
688 cnp->cn_consume += et->et_keylen - cnp->cn_namelen;
689 /*
690 * consume trailing slashes if any and clear
691 * REQUIREDIR if we consumed the full path.
692 */
693 cp = &cnp->cn_nameptr[cnp->cn_namelen];
694 cp += cnp->cn_consume;
695 KASSERT(*cp == '\0' || *cp == '/');
696 if (*cp == '\0' && rn->rn_va.va_type != VDIR)
697 cnp->cn_flags &= ~REQUIREDIR;
698 while (*cp++ == '/')
699 cnp->cn_consume++;
700 goto getvnode;
701 }
702 }
703
704 if (rnd->rn_flags & RUMPNODE_DIR_ET) {
705 uint64_t fsize;
706 char *newpath;
707 size_t newpathlen;
708 int hft, error;
709
710 if (dotdot)
711 return EOPNOTSUPP;
712
713 newpathlen = strlen(rnd->rn_hostpath) + 1 + cnp->cn_namelen + 1;
714 newpath = malloc(newpathlen, M_TEMP, M_WAITOK);
715
716 strlcpy(newpath, rnd->rn_hostpath, newpathlen);
717 strlcat(newpath, "/", newpathlen);
718 strlcat(newpath, cnp->cn_nameptr, newpathlen);
719
720 if ((error = rumpuser_getfileinfo(newpath, &fsize, &hft)) != 0){
721 free(newpath, M_TEMP);
722 return error;
723 }
724
725 /* allow only dirs and regular files */
726 if (hft != RUMPUSER_FT_REG && hft != RUMPUSER_FT_DIR) {
727 free(newpath, M_TEMP);
728 return ENOENT;
729 }
730
731 rn = makeprivate(hft_to_vtype(hft), RUMPFS_DEFAULTMODE,
732 NODEV, fsize, true);
733 rn->rn_flags |= RUMPNODE_CANRECLAIM;
734 if (rnd->rn_flags & RUMPNODE_DIR_ETSUBS) {
735 rn->rn_flags |= RUMPNODE_DIR_ET | RUMPNODE_DIR_ETSUBS;
736 rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
737 }
738 rn->rn_hostpath = newpath;
739
740 goto getvnode;
741 } else {
742 if (dotdot) {
743 if ((rn = rnd->rn_parent) != NULL)
744 goto getvnode;
745 } else {
746 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
747 if (rd->rd_namelen == cnp->cn_namelen &&
748 strncmp(rd->rd_name, cnp->cn_nameptr,
749 cnp->cn_namelen) == 0)
750 break;
751 }
752 }
753 }
754
755 if (!rd && ((cnp->cn_flags & ISLASTCN) == 0||cnp->cn_nameiop != CREATE))
756 return ENOENT;
757
758 if (!rd && (cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
759 if (dvp->v_mount->mnt_flag & MNT_RDONLY)
760 return EROFS;
761 rv = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
762 if (rv)
763 return rv;
764 return EJUSTRETURN;
765 }
766
767 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == DELETE) {
768 rv = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
769 if (rv)
770 return rv;
771 }
772
773 if (RDENT_ISWHITEOUT(rd)) {
774 cnp->cn_flags |= ISWHITEOUT;
775 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE)
776 return EJUSTRETURN;
777 return ENOENT;
778 }
779
780 rn = rd->rd_node;
781
782 getvnode:
783 KASSERT(rn);
784 mutex_enter(&reclock);
785 if ((vp = rn->rn_vp)) {
786 mutex_enter(vp->v_interlock);
787 mutex_exit(&reclock);
788 if (vget(vp, 0)) {
789 goto getvnode;
790 }
791 *vpp = vp;
792 } else {
793 mutex_exit(&reclock);
794 rv = makevnode(dvp->v_mount, rn, vpp);
795 }
796
797 return rv;
798 }
799
800 static int
801 rump_check_possible(struct vnode *vp, struct rumpfs_node *rnode,
802 mode_t mode)
803 {
804
805 if ((mode & VWRITE) == 0)
806 return 0;
807
808 switch (vp->v_type) {
809 case VDIR:
810 case VLNK:
811 case VREG:
812 break;
813 default:
814 /* special file is always writable. */
815 return 0;
816 }
817
818 return vp->v_mount->mnt_flag & MNT_RDONLY ? EROFS : 0;
819 }
820
821 static int
822 rump_check_permitted(struct vnode *vp, struct rumpfs_node *rnode,
823 mode_t mode, kauth_cred_t cred)
824 {
825 struct vattr *attr = &rnode->rn_va;
826
827 return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(mode,
828 vp->v_type, attr->va_mode), vp, NULL, genfs_can_access(vp->v_type,
829 attr->va_mode, attr->va_uid, attr->va_gid, mode, cred));
830 }
831
832 int
833 rump_vop_access(void *v)
834 {
835 struct vop_access_args /* {
836 const struct vnodeop_desc *a_desc;
837 struct vnode *a_vp;
838 int a_mode;
839 kauth_cred_t a_cred;
840 } */ *ap = v;
841 struct vnode *vp = ap->a_vp;
842 struct rumpfs_node *rn = vp->v_data;
843 int error;
844
845 error = rump_check_possible(vp, rn, ap->a_mode);
846 if (error)
847 return error;
848
849 error = rump_check_permitted(vp, rn, ap->a_mode, ap->a_cred);
850
851 return error;
852 }
853
854 static int
855 rump_vop_getattr(void *v)
856 {
857 struct vop_getattr_args /* {
858 struct vnode *a_vp;
859 struct vattr *a_vap;
860 kauth_cred_t a_cred;
861 } */ *ap = v;
862 struct vnode *vp = ap->a_vp;
863 struct rumpfs_node *rn = vp->v_data;
864 struct vattr *vap = ap->a_vap;
865
866 memcpy(vap, &rn->rn_va, sizeof(struct vattr));
867 vap->va_size = vp->v_size;
868 return 0;
869 }
870
871 static int
872 rump_vop_setattr(void *v)
873 {
874 struct vop_setattr_args /* {
875 struct vnode *a_vp;
876 struct vattr *a_vap;
877 kauth_cred_t a_cred;
878 } */ *ap = v;
879 struct vnode *vp = ap->a_vp;
880 struct vattr *vap = ap->a_vap;
881 struct rumpfs_node *rn = vp->v_data;
882 struct vattr *attr = &rn->rn_va;
883 kauth_cred_t cred = ap->a_cred;
884 int error;
885
886 #define CHANGED(a, t) (vap->a != (t)VNOVAL)
887 #define SETIFVAL(a,t) if (CHANGED(a, t)) rn->rn_va.a = vap->a
888 if (CHANGED(va_atime.tv_sec, time_t) ||
889 CHANGED(va_ctime.tv_sec, time_t) ||
890 CHANGED(va_mtime.tv_sec, time_t) ||
891 CHANGED(va_birthtime.tv_sec, time_t) ||
892 CHANGED(va_atime.tv_nsec, long) ||
893 CHANGED(va_ctime.tv_nsec, long) ||
894 CHANGED(va_mtime.tv_nsec, long) ||
895 CHANGED(va_birthtime.tv_nsec, long)) {
896 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp,
897 NULL, genfs_can_chtimes(vp, vap->va_vaflags, attr->va_uid,
898 cred));
899 if (error)
900 return error;
901 }
902
903 SETIFVAL(va_atime.tv_sec, time_t);
904 SETIFVAL(va_ctime.tv_sec, time_t);
905 SETIFVAL(va_mtime.tv_sec, time_t);
906 SETIFVAL(va_birthtime.tv_sec, time_t);
907 SETIFVAL(va_atime.tv_nsec, long);
908 SETIFVAL(va_ctime.tv_nsec, long);
909 SETIFVAL(va_mtime.tv_nsec, long);
910 SETIFVAL(va_birthtime.tv_nsec, long);
911
912 if (CHANGED(va_flags, u_long)) {
913 /* XXX Can we handle system flags here...? */
914 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_FLAGS, vp,
915 NULL, genfs_can_chflags(cred, vp->v_type, attr->va_uid,
916 false));
917 if (error)
918 return error;
919 }
920
921 SETIFVAL(va_flags, u_long);
922 #undef SETIFVAL
923 #undef CHANGED
924
925 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (uid_t)VNOVAL) {
926 uid_t uid =
927 (vap->va_uid != (uid_t)VNOVAL) ? vap->va_uid : attr->va_uid;
928 gid_t gid =
929 (vap->va_gid != (gid_t)VNOVAL) ? vap->va_gid : attr->va_gid;
930 error = kauth_authorize_vnode(cred,
931 KAUTH_VNODE_CHANGE_OWNERSHIP, vp, NULL,
932 genfs_can_chown(cred, attr->va_uid, attr->va_gid, uid,
933 gid));
934 if (error)
935 return error;
936 attr->va_uid = uid;
937 attr->va_gid = gid;
938 }
939
940 if (vap->va_mode != (mode_t)VNOVAL) {
941 mode_t mode = vap->va_mode;
942 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY,
943 vp, NULL, genfs_can_chmod(vp->v_type, cred, attr->va_uid,
944 attr->va_gid, mode));
945 if (error)
946 return error;
947 attr->va_mode = mode;
948 }
949
950 if (vp->v_type == VREG &&
951 vap->va_size != VSIZENOTSET &&
952 vap->va_size != rn->rn_dlen &&
953 (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0) {
954 void *newdata;
955 size_t copylen, newlen;
956
957 newlen = vap->va_size;
958 newdata = rump_hypermalloc(newlen, 0, false, "rumpfs");
959 if (newdata == NULL)
960 return ENOSPC;
961
962 copylen = MIN(rn->rn_dlen, newlen);
963 memset(newdata, 0, newlen);
964 memcpy(newdata, rn->rn_data, copylen);
965 rump_hyperfree(rn->rn_data, rn->rn_dlen);
966
967 rn->rn_data = newdata;
968 rn->rn_dlen = newlen;
969 uvm_vnp_setsize(vp, newlen);
970 }
971 return 0;
972 }
973
974 static int
975 rump_vop_mkdir(void *v)
976 {
977 struct vop_mkdir_v3_args /* {
978 struct vnode *a_dvp;
979 struct vnode **a_vpp;
980 struct componentname *a_cnp;
981 struct vattr *a_vap;
982 }; */ *ap = v;
983 struct vnode *dvp = ap->a_dvp;
984 struct vnode **vpp = ap->a_vpp;
985 struct componentname *cnp = ap->a_cnp;
986 struct vattr *va = ap->a_vap;
987 struct rumpfs_node *rnd = dvp->v_data, *rn;
988 int rv = 0;
989
990 rn = makeprivate(VDIR, va->va_mode & ALLPERMS, NODEV, DEV_BSIZE, false);
991 if ((cnp->cn_flags & ISWHITEOUT) != 0)
992 rn->rn_va.va_flags |= UF_OPAQUE;
993 rn->rn_parent = rnd;
994 rv = makevnode(dvp->v_mount, rn, vpp);
995 if (rv)
996 return rv;
997
998 makedir(rnd, cnp, rn);
999
1000 return rv;
1001 }
1002
1003 static int
1004 rump_vop_rmdir(void *v)
1005 {
1006 struct vop_rmdir_args /* {
1007 struct vnode *a_dvp;
1008 struct vnode *a_vp;
1009 struct componentname *a_cnp;
1010 }; */ *ap = v;
1011 struct vnode *dvp = ap->a_dvp;
1012 struct vnode *vp = ap->a_vp;
1013 struct componentname *cnp = ap->a_cnp;
1014 struct rumpfs_node *rnd = dvp->v_data;
1015 struct rumpfs_node *rn = vp->v_data;
1016 struct rumpfs_dent *rd;
1017 int rv = 0;
1018
1019 LIST_FOREACH(rd, &rn->rn_dir, rd_entries) {
1020 if (rd->rd_node != RUMPFS_WHITEOUT) {
1021 rv = ENOTEMPTY;
1022 goto out;
1023 }
1024 }
1025 while ((rd = LIST_FIRST(&rn->rn_dir)) != NULL) {
1026 KASSERT(rd->rd_node == RUMPFS_WHITEOUT);
1027 LIST_REMOVE(rd, rd_entries);
1028 kmem_free(rd->rd_name, rd->rd_namelen+1);
1029 kmem_free(rd, sizeof(*rd));
1030 }
1031
1032 freedir(rnd, cnp);
1033 rn->rn_flags |= RUMPNODE_CANRECLAIM;
1034 rn->rn_parent = NULL;
1035
1036 out:
1037 vput(dvp);
1038 vput(vp);
1039
1040 return rv;
1041 }
1042
1043 static int
1044 rump_vop_remove(void *v)
1045 {
1046 struct vop_remove_args /* {
1047 struct vnode *a_dvp;
1048 struct vnode *a_vp;
1049 struct componentname *a_cnp;
1050 }; */ *ap = v;
1051 struct vnode *dvp = ap->a_dvp;
1052 struct vnode *vp = ap->a_vp;
1053 struct componentname *cnp = ap->a_cnp;
1054 struct rumpfs_node *rnd = dvp->v_data;
1055 struct rumpfs_node *rn = vp->v_data;
1056 int rv = 0;
1057
1058 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1059 return EOPNOTSUPP;
1060
1061 freedir(rnd, cnp);
1062 rn->rn_flags |= RUMPNODE_CANRECLAIM;
1063
1064 vput(dvp);
1065 vput(vp);
1066
1067 return rv;
1068 }
1069
1070 static int
1071 rump_vop_mknod(void *v)
1072 {
1073 struct vop_mknod_v3_args /* {
1074 struct vnode *a_dvp;
1075 struct vnode **a_vpp;
1076 struct componentname *a_cnp;
1077 struct vattr *a_vap;
1078 }; */ *ap = v;
1079 struct vnode *dvp = ap->a_dvp;
1080 struct vnode **vpp = ap->a_vpp;
1081 struct componentname *cnp = ap->a_cnp;
1082 struct vattr *va = ap->a_vap;
1083 struct rumpfs_node *rnd = dvp->v_data, *rn;
1084 int rv;
1085
1086 rn = makeprivate(va->va_type, va->va_mode & ALLPERMS, va->va_rdev,
1087 DEV_BSIZE, false);
1088 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1089 rn->rn_va.va_flags |= UF_OPAQUE;
1090 rv = makevnode(dvp->v_mount, rn, vpp);
1091 if (rv)
1092 return rv;
1093
1094 makedir(rnd, cnp, rn);
1095
1096 return rv;
1097 }
1098
1099 static int
1100 rump_vop_create(void *v)
1101 {
1102 struct vop_create_v3_args /* {
1103 struct vnode *a_dvp;
1104 struct vnode **a_vpp;
1105 struct componentname *a_cnp;
1106 struct vattr *a_vap;
1107 }; */ *ap = v;
1108 struct vnode *dvp = ap->a_dvp;
1109 struct vnode **vpp = ap->a_vpp;
1110 struct componentname *cnp = ap->a_cnp;
1111 struct vattr *va = ap->a_vap;
1112 struct rumpfs_node *rnd = dvp->v_data, *rn;
1113 off_t newsize;
1114 int rv;
1115
1116 newsize = va->va_type == VSOCK ? DEV_BSIZE : 0;
1117 rn = makeprivate(va->va_type, va->va_mode & ALLPERMS, NODEV,
1118 newsize, false);
1119 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1120 rn->rn_va.va_flags |= UF_OPAQUE;
1121 rv = makevnode(dvp->v_mount, rn, vpp);
1122 if (rv)
1123 return rv;
1124
1125 makedir(rnd, cnp, rn);
1126
1127 return rv;
1128 }
1129
1130 static int
1131 rump_vop_symlink(void *v)
1132 {
1133 struct vop_symlink_v3_args /* {
1134 struct vnode *a_dvp;
1135 struct vnode **a_vpp;
1136 struct componentname *a_cnp;
1137 struct vattr *a_vap;
1138 char *a_target;
1139 }; */ *ap = v;
1140 struct vnode *dvp = ap->a_dvp;
1141 struct vnode **vpp = ap->a_vpp;
1142 struct componentname *cnp = ap->a_cnp;
1143 struct vattr *va = ap->a_vap;
1144 struct rumpfs_node *rnd = dvp->v_data, *rn;
1145 const char *target = ap->a_target;
1146 size_t linklen;
1147 int rv;
1148
1149 linklen = strlen(target);
1150 KASSERT(linklen < MAXPATHLEN);
1151 rn = makeprivate(VLNK, va->va_mode & ALLPERMS, NODEV, linklen, false);
1152 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1153 rn->rn_va.va_flags |= UF_OPAQUE;
1154 rv = makevnode(dvp->v_mount, rn, vpp);
1155 if (rv)
1156 return rv;
1157
1158 makedir(rnd, cnp, rn);
1159
1160 KASSERT(linklen < MAXPATHLEN);
1161 rn->rn_linktarg = PNBUF_GET();
1162 rn->rn_linklen = linklen;
1163 strcpy(rn->rn_linktarg, target);
1164
1165 return rv;
1166 }
1167
1168 static int
1169 rump_vop_readlink(void *v)
1170 {
1171 struct vop_readlink_args /* {
1172 struct vnode *a_vp;
1173 struct uio *a_uio;
1174 kauth_cred_t a_cred;
1175 }; */ *ap = v;
1176 struct vnode *vp = ap->a_vp;
1177 struct rumpfs_node *rn = vp->v_data;
1178 struct uio *uio = ap->a_uio;
1179
1180 return uiomove(rn->rn_linktarg, rn->rn_linklen, uio);
1181 }
1182
1183 static int
1184 rump_vop_whiteout(void *v)
1185 {
1186 struct vop_whiteout_args /* {
1187 struct vnode *a_dvp;
1188 struct componentname *a_cnp;
1189 int a_flags;
1190 } */ *ap = v;
1191 struct vnode *dvp = ap->a_dvp;
1192 struct rumpfs_node *rnd = dvp->v_data;
1193 struct componentname *cnp = ap->a_cnp;
1194 int flags = ap->a_flags;
1195
1196 switch (flags) {
1197 case LOOKUP:
1198 break;
1199 case CREATE:
1200 makedir(rnd, cnp, RUMPFS_WHITEOUT);
1201 break;
1202 case DELETE:
1203 cnp->cn_flags &= ~DOWHITEOUT; /* cargo culting never fails ? */
1204 freedir(rnd, cnp);
1205 break;
1206 default:
1207 panic("unknown whiteout op %d", flags);
1208 }
1209
1210 return 0;
1211 }
1212
1213 static int
1214 rump_vop_open(void *v)
1215 {
1216 struct vop_open_args /* {
1217 struct vnode *a_vp;
1218 int a_mode;
1219 kauth_cred_t a_cred;
1220 } */ *ap = v;
1221 struct vnode *vp = ap->a_vp;
1222 struct rumpfs_node *rn = vp->v_data;
1223 int mode = ap->a_mode;
1224 int error = EINVAL;
1225
1226 if (vp->v_type != VREG || (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0)
1227 return 0;
1228
1229 if (mode & FREAD) {
1230 if (rn->rn_readfd != -1)
1231 return 0;
1232 error = rumpuser_open(rn->rn_hostpath,
1233 RUMPUSER_OPEN_RDONLY, &rn->rn_readfd);
1234 }
1235
1236 if (mode & FWRITE) {
1237 if (rn->rn_writefd != -1)
1238 return 0;
1239 error = rumpuser_open(rn->rn_hostpath,
1240 RUMPUSER_OPEN_WRONLY, &rn->rn_writefd);
1241 }
1242
1243 return error;
1244 }
1245
1246 /* simple readdir. even omits dotstuff and periods */
1247 static int
1248 rump_vop_readdir(void *v)
1249 {
1250 struct vop_readdir_args /* {
1251 struct vnode *a_vp;
1252 struct uio *a_uio;
1253 kauth_cred_t a_cred;
1254 int *a_eofflag;
1255 off_t **a_cookies;
1256 int *a_ncookies;
1257 } */ *ap = v;
1258 struct vnode *vp = ap->a_vp;
1259 struct uio *uio = ap->a_uio;
1260 struct rumpfs_node *rnd = vp->v_data;
1261 struct rumpfs_dent *rdent;
1262 struct dirent *dentp = NULL;
1263 unsigned i;
1264 int rv = 0;
1265
1266 /* seek to current entry */
1267 for (i = 0, rdent = LIST_FIRST(&rnd->rn_dir);
1268 (i < uio->uio_offset) && rdent;
1269 i++, rdent = LIST_NEXT(rdent, rd_entries))
1270 continue;
1271 if (!rdent)
1272 goto out;
1273
1274 /* copy entries */
1275 dentp = kmem_alloc(sizeof(*dentp), KM_SLEEP);
1276 for (; rdent && uio->uio_resid > 0;
1277 rdent = LIST_NEXT(rdent, rd_entries), i++) {
1278 strlcpy(dentp->d_name, rdent->rd_name, sizeof(dentp->d_name));
1279 dentp->d_namlen = strlen(dentp->d_name);
1280 dentp->d_reclen = _DIRENT_RECLEN(dentp, dentp->d_namlen);
1281
1282 if (__predict_false(RDENT_ISWHITEOUT(rdent))) {
1283 dentp->d_fileno = INO_WHITEOUT;
1284 dentp->d_type = DT_WHT;
1285 } else {
1286 dentp->d_fileno = rdent->rd_node->rn_va.va_fileid;
1287 dentp->d_type = vtype2dt(rdent->rd_node->rn_va.va_type);
1288 }
1289
1290 if (uio->uio_resid < dentp->d_reclen) {
1291 i--;
1292 break;
1293 }
1294
1295 rv = uiomove(dentp, dentp->d_reclen, uio);
1296 if (rv) {
1297 i--;
1298 break;
1299 }
1300 }
1301 kmem_free(dentp, sizeof(*dentp));
1302 dentp = NULL;
1303
1304 out:
1305 KASSERT(dentp == NULL);
1306 if (ap->a_cookies) {
1307 *ap->a_ncookies = 0;
1308 *ap->a_cookies = NULL;
1309 }
1310 if (rdent)
1311 *ap->a_eofflag = 0;
1312 else
1313 *ap->a_eofflag = 1;
1314 uio->uio_offset = i;
1315
1316 return rv;
1317 }
1318
1319 static int
1320 etread(struct rumpfs_node *rn, struct uio *uio)
1321 {
1322 struct rumpuser_iovec iov;
1323 uint8_t *buf;
1324 size_t bufsize, n;
1325 int error = 0;
1326
1327 bufsize = uio->uio_resid;
1328 if (bufsize == 0)
1329 return 0;
1330 buf = kmem_alloc(bufsize, KM_SLEEP);
1331
1332 iov.iov_base = buf;
1333 iov.iov_len = bufsize;
1334 if ((error = rumpuser_iovread(rn->rn_readfd, &iov, 1,
1335 uio->uio_offset + rn->rn_offset, &n)) == 0) {
1336 KASSERT(n <= bufsize);
1337 error = uiomove(buf, n, uio);
1338 }
1339
1340 kmem_free(buf, bufsize);
1341 return error;
1342 }
1343
1344 static int
1345 rump_vop_read(void *v)
1346 {
1347 struct vop_read_args /* {
1348 struct vnode *a_vp;
1349 struct uio *a_uio;
1350 int ioflags a_ioflag;
1351 kauth_cred_t a_cred;
1352 }; */ *ap = v;
1353 struct vnode *vp = ap->a_vp;
1354 struct rumpfs_node *rn = vp->v_data;
1355 struct uio *uio = ap->a_uio;
1356 const int advice = IO_ADV_DECODE(ap->a_ioflag);
1357 off_t chunk;
1358 int error = 0;
1359
1360 if (vp->v_type == VDIR)
1361 return EISDIR;
1362
1363 /* et op? */
1364 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1365 return etread(rn, uio);
1366
1367 /* otherwise, it's off to ubc with us */
1368 while (uio->uio_resid > 0) {
1369 chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1370 if (chunk == 0)
1371 break;
1372 error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1373 UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
1374 if (error)
1375 break;
1376 }
1377
1378 return error;
1379 }
1380
1381 static int
1382 etwrite(struct rumpfs_node *rn, struct uio *uio)
1383 {
1384 struct rumpuser_iovec iov;
1385 uint8_t *buf;
1386 size_t bufsize, n;
1387 int error = 0;
1388
1389 bufsize = uio->uio_resid;
1390 if (bufsize == 0)
1391 return 0;
1392 buf = kmem_alloc(bufsize, KM_SLEEP);
1393 error = uiomove(buf, bufsize, uio);
1394 if (error)
1395 goto out;
1396
1397 KASSERT(uio->uio_resid == 0);
1398 iov.iov_base = buf;
1399 iov.iov_len = bufsize;
1400 if ((error = rumpuser_iovwrite(rn->rn_writefd, &iov, 1,
1401 (uio->uio_offset-bufsize) + rn->rn_offset, &n)) == 0) {
1402 KASSERT(n <= bufsize);
1403 uio->uio_resid = bufsize - n;
1404 }
1405
1406 out:
1407 kmem_free(buf, bufsize);
1408 return error;
1409 }
1410
1411 static int
1412 rump_vop_write(void *v)
1413 {
1414 struct vop_write_args /* {
1415 struct vnode *a_vp;
1416 struct uio *a_uio;
1417 int ioflags a_ioflag;
1418 kauth_cred_t a_cred;
1419 }; */ *ap = v;
1420 struct vnode *vp = ap->a_vp;
1421 struct rumpfs_node *rn = vp->v_data;
1422 struct uio *uio = ap->a_uio;
1423 const int advice = IO_ADV_DECODE(ap->a_ioflag);
1424 void *olddata;
1425 size_t oldlen, newlen;
1426 off_t chunk;
1427 int error = 0;
1428 bool allocd = false;
1429
1430 if (ap->a_ioflag & IO_APPEND)
1431 uio->uio_offset = vp->v_size;
1432
1433 /* consult et? */
1434 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1435 return etwrite(rn, uio);
1436
1437 /*
1438 * Otherwise, it's a case of ubcmove.
1439 */
1440
1441 /*
1442 * First, make sure we have enough storage.
1443 *
1444 * No, you don't need to tell me it's not very efficient.
1445 * No, it doesn't really support sparse files, just fakes it.
1446 */
1447 newlen = uio->uio_offset + uio->uio_resid;
1448 oldlen = 0; /* XXXgcc */
1449 olddata = NULL;
1450 if (rn->rn_dlen < newlen) {
1451 oldlen = rn->rn_dlen;
1452 olddata = rn->rn_data;
1453
1454 rn->rn_data = rump_hypermalloc(newlen, 0, false, "rumpfs");
1455 if (rn->rn_data == NULL)
1456 return ENOSPC;
1457 rn->rn_dlen = newlen;
1458 memset(rn->rn_data, 0, newlen);
1459 memcpy(rn->rn_data, olddata, oldlen);
1460 allocd = true;
1461 uvm_vnp_setsize(vp, newlen);
1462 }
1463
1464 /* ok, we have enough stooorage. write */
1465 while (uio->uio_resid > 0) {
1466 chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1467 if (chunk == 0)
1468 break;
1469 error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1470 UBC_WRITE | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
1471 if (error)
1472 break;
1473 }
1474
1475 if (allocd) {
1476 if (error) {
1477 rump_hyperfree(rn->rn_data, newlen);
1478 rn->rn_data = olddata;
1479 rn->rn_dlen = oldlen;
1480 uvm_vnp_setsize(vp, oldlen);
1481 } else {
1482 rump_hyperfree(olddata, oldlen);
1483 }
1484 }
1485
1486 return error;
1487 }
1488
1489 static int
1490 rump_vop_bmap(void *v)
1491 {
1492 struct vop_bmap_args /* {
1493 struct vnode *a_vp;
1494 daddr_t a_bn;
1495 struct vnode **a_vpp;
1496 daddr_t *a_bnp;
1497 int *a_runp;
1498 } */ *ap = v;
1499
1500 /* 1:1 mapping */
1501 if (ap->a_vpp)
1502 *ap->a_vpp = ap->a_vp;
1503 if (ap->a_bnp)
1504 *ap->a_bnp = ap->a_bn;
1505 if (ap->a_runp)
1506 *ap->a_runp = 16;
1507
1508 return 0;
1509 }
1510
1511 static int
1512 rump_vop_strategy(void *v)
1513 {
1514 struct vop_strategy_args /* {
1515 struct vnode *a_vp;
1516 struct buf *a_bp;
1517 } */ *ap = v;
1518 struct vnode *vp = ap->a_vp;
1519 struct rumpfs_node *rn = vp->v_data;
1520 struct buf *bp = ap->a_bp;
1521 off_t copylen, copyoff;
1522 int error;
1523
1524 if (vp->v_type != VREG || rn->rn_flags & RUMPNODE_ET_PHONE_HOST) {
1525 error = EINVAL;
1526 goto out;
1527 }
1528
1529 copyoff = bp->b_blkno << DEV_BSHIFT;
1530 copylen = MIN(rn->rn_dlen - copyoff, bp->b_bcount);
1531 if (BUF_ISWRITE(bp)) {
1532 memcpy((uint8_t *)rn->rn_data + copyoff, bp->b_data, copylen);
1533 } else {
1534 memset((uint8_t*)bp->b_data + copylen, 0, bp->b_bcount-copylen);
1535 memcpy(bp->b_data, (uint8_t *)rn->rn_data + copyoff, copylen);
1536 }
1537 bp->b_resid = 0;
1538 error = 0;
1539
1540 out:
1541 bp->b_error = error;
1542 biodone(bp);
1543 return 0;
1544 }
1545
1546 static int
1547 rump_vop_pathconf(void *v)
1548 {
1549 struct vop_pathconf_args /* {
1550 struct vnode *a_vp;
1551 int a_name;
1552 register_t *a_retval;
1553 }; */ *ap = v;
1554 int name = ap->a_name;
1555 register_t *retval = ap->a_retval;
1556
1557 switch (name) {
1558 case _PC_LINK_MAX:
1559 *retval = LINK_MAX;
1560 return 0;
1561 case _PC_NAME_MAX:
1562 *retval = RUMPFS_MAXNAMLEN;
1563 return 0;
1564 case _PC_PATH_MAX:
1565 *retval = PATH_MAX;
1566 return 0;
1567 case _PC_PIPE_BUF:
1568 *retval = PIPE_BUF;
1569 return 0;
1570 case _PC_CHOWN_RESTRICTED:
1571 *retval = 1;
1572 return 0;
1573 case _PC_NO_TRUNC:
1574 *retval = 1;
1575 return 0;
1576 case _PC_SYNC_IO:
1577 *retval = 1;
1578 return 0;
1579 case _PC_FILESIZEBITS:
1580 *retval = 43; /* this one goes to 11 */
1581 return 0;
1582 case _PC_SYMLINK_MAX:
1583 *retval = MAXPATHLEN;
1584 return 0;
1585 case _PC_2_SYMLINKS:
1586 *retval = 1;
1587 return 0;
1588 default:
1589 return EINVAL;
1590 }
1591 }
1592
1593 static int
1594 rump_vop_success(void *v)
1595 {
1596
1597 return 0;
1598 }
1599
1600 static int
1601 rump_vop_inactive(void *v)
1602 {
1603 struct vop_inactive_args /* {
1604 struct vnode *a_vp;
1605 bool *a_recycle;
1606 } */ *ap = v;
1607 struct vnode *vp = ap->a_vp;
1608 struct rumpfs_node *rn = vp->v_data;
1609
1610 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST && vp->v_type == VREG) {
1611 if (rn->rn_readfd != -1) {
1612 rumpuser_close(rn->rn_readfd);
1613 rn->rn_readfd = -1;
1614 }
1615 if (rn->rn_writefd != -1) {
1616 rumpuser_close(rn->rn_writefd);
1617 rn->rn_writefd = -1;
1618 }
1619 }
1620 *ap->a_recycle = (rn->rn_flags & RUMPNODE_CANRECLAIM) ? true : false;
1621
1622 VOP_UNLOCK(vp);
1623 return 0;
1624 }
1625
1626 static int
1627 rump_vop_reclaim(void *v)
1628 {
1629 struct vop_reclaim_args /* {
1630 struct vnode *a_vp;
1631 } */ *ap = v;
1632 struct vnode *vp = ap->a_vp;
1633 struct rumpfs_node *rn = vp->v_data;
1634
1635 mutex_enter(&reclock);
1636 rn->rn_vp = NULL;
1637 mutex_exit(&reclock);
1638 genfs_node_destroy(vp);
1639 vp->v_data = NULL;
1640
1641 if (rn->rn_flags & RUMPNODE_CANRECLAIM) {
1642 if (vp->v_type == VREG
1643 && (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0
1644 && rn->rn_data) {
1645 rump_hyperfree(rn->rn_data, rn->rn_dlen);
1646 rn->rn_data = NULL;
1647 }
1648
1649 if (vp->v_type == VLNK)
1650 PNBUF_PUT(rn->rn_linktarg);
1651 if (rn->rn_hostpath)
1652 free(rn->rn_hostpath, M_TEMP);
1653 kmem_free(rn, sizeof(*rn));
1654 }
1655
1656 return 0;
1657 }
1658
1659 static int
1660 rump_vop_spec(void *v)
1661 {
1662 struct vop_generic_args *ap = v;
1663 int (**opvec)(void *);
1664
1665 switch (ap->a_desc->vdesc_offset) {
1666 case VOP_ACCESS_DESCOFFSET:
1667 case VOP_GETATTR_DESCOFFSET:
1668 case VOP_SETATTR_DESCOFFSET:
1669 case VOP_LOCK_DESCOFFSET:
1670 case VOP_UNLOCK_DESCOFFSET:
1671 case VOP_ISLOCKED_DESCOFFSET:
1672 case VOP_RECLAIM_DESCOFFSET:
1673 opvec = rump_vnodeop_p;
1674 break;
1675 default:
1676 opvec = spec_vnodeop_p;
1677 break;
1678 }
1679
1680 return VOCALL(opvec, ap->a_desc->vdesc_offset, v);
1681 }
1682
1683 static int
1684 rump_vop_advlock(void *v)
1685 {
1686 struct vop_advlock_args /* {
1687 const struct vnodeop_desc *a_desc;
1688 struct vnode *a_vp;
1689 void *a_id;
1690 int a_op;
1691 struct flock *a_fl;
1692 int a_flags;
1693 } */ *ap = v;
1694 struct vnode *vp = ap->a_vp;
1695 struct rumpfs_node *rn = vp->v_data;
1696
1697 return lf_advlock(ap, &rn->rn_lockf, vp->v_size);
1698 }
1699
1700 /*
1701 * Begin vfs-level stuff
1702 */
1703
1704 VFS_PROTOS(rumpfs);
1705 struct vfsops rumpfs_vfsops = {
1706 .vfs_name = MOUNT_RUMPFS,
1707 .vfs_min_mount_data = 0,
1708 .vfs_mount = rumpfs_mount,
1709 .vfs_start = (void *)nullop,
1710 .vfs_unmount = rumpfs_unmount,
1711 .vfs_root = rumpfs_root,
1712 .vfs_quotactl = (void *)eopnotsupp,
1713 .vfs_statvfs = genfs_statvfs,
1714 .vfs_sync = (void *)nullop,
1715 .vfs_vget = rumpfs_vget,
1716 .vfs_fhtovp = (void *)eopnotsupp,
1717 .vfs_vptofh = (void *)eopnotsupp,
1718 .vfs_init = rumpfs_init,
1719 .vfs_reinit = NULL,
1720 .vfs_done = rumpfs_done,
1721 .vfs_mountroot = rumpfs_mountroot,
1722 .vfs_snapshot = (void *)eopnotsupp,
1723 .vfs_extattrctl = (void *)eopnotsupp,
1724 .vfs_suspendctl = (void *)eopnotsupp,
1725 .vfs_renamelock_enter = genfs_renamelock_enter,
1726 .vfs_renamelock_exit = genfs_renamelock_exit,
1727 .vfs_opv_descs = rump_opv_descs,
1728 /* vfs_refcount */
1729 /* vfs_list */
1730 };
1731
1732 static int
1733 rumpfs_mountfs(struct mount *mp)
1734 {
1735 struct rumpfs_mount *rfsmp;
1736 struct rumpfs_node *rn;
1737 int error;
1738
1739 rfsmp = kmem_alloc(sizeof(*rfsmp), KM_SLEEP);
1740
1741 rn = makeprivate(VDIR, RUMPFS_DEFAULTMODE, NODEV, DEV_BSIZE, false);
1742 rn->rn_parent = rn;
1743 if ((error = makevnode(mp, rn, &rfsmp->rfsmp_rvp)) != 0)
1744 return error;
1745
1746 rfsmp->rfsmp_rvp->v_vflag |= VV_ROOT;
1747
1748 mp->mnt_data = rfsmp;
1749 mp->mnt_stat.f_namemax = RUMPFS_MAXNAMLEN;
1750 mp->mnt_stat.f_iosize = 512;
1751 mp->mnt_flag |= MNT_LOCAL;
1752 mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO;
1753 mp->mnt_fs_bshift = DEV_BSHIFT;
1754 vfs_getnewfsid(mp);
1755
1756 return 0;
1757 }
1758
1759 int
1760 rumpfs_mount(struct mount *mp, const char *mntpath, void *arg, size_t *alen)
1761 {
1762 int error;
1763
1764 if (mp->mnt_flag & MNT_UPDATE) {
1765 return 0;
1766 }
1767
1768 error = set_statvfs_info(mntpath, UIO_USERSPACE, "rumpfs", UIO_SYSSPACE,
1769 mp->mnt_op->vfs_name, mp, curlwp);
1770 if (error)
1771 return error;
1772
1773 return rumpfs_mountfs(mp);
1774 }
1775
1776 int
1777 rumpfs_unmount(struct mount *mp, int mntflags)
1778 {
1779 struct rumpfs_mount *rfsmp = mp->mnt_data;
1780 int flags = 0, error;
1781
1782 if (panicstr || mntflags & MNT_FORCE)
1783 flags |= FORCECLOSE;
1784
1785 if (rfsmp->rfsmp_rvp->v_usecount > 1 && (flags & FORCECLOSE) == 0)
1786 return EBUSY;
1787
1788 if ((error = vflush(mp, rfsmp->rfsmp_rvp, flags)) != 0)
1789 return error;
1790 vgone(rfsmp->rfsmp_rvp);
1791
1792 kmem_free(rfsmp, sizeof(*rfsmp));
1793
1794 return 0;
1795 }
1796
1797 int
1798 rumpfs_root(struct mount *mp, struct vnode **vpp)
1799 {
1800 struct rumpfs_mount *rfsmp = mp->mnt_data;
1801
1802 vref(rfsmp->rfsmp_rvp);
1803 vn_lock(rfsmp->rfsmp_rvp, LK_EXCLUSIVE | LK_RETRY);
1804 *vpp = rfsmp->rfsmp_rvp;
1805 return 0;
1806 }
1807
1808 int
1809 rumpfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1810 {
1811
1812 return EOPNOTSUPP;
1813 }
1814
1815 void
1816 rumpfs_init()
1817 {
1818 extern rump_etfs_register_withsize_fn rump__etfs_register;
1819 extern rump_etfs_remove_fn rump__etfs_remove;
1820
1821 CTASSERT(RUMP_ETFS_SIZE_ENDOFF == RUMPBLK_SIZENOTSET);
1822
1823 mutex_init(&reclock, MUTEX_DEFAULT, IPL_NONE);
1824 mutex_init(&etfs_lock, MUTEX_DEFAULT, IPL_NONE);
1825
1826 rump__etfs_register = etfsregister;
1827 rump__etfs_remove = etfsremove;
1828 }
1829
1830 void
1831 rumpfs_done()
1832 {
1833
1834 mutex_destroy(&reclock);
1835 mutex_destroy(&etfs_lock);
1836 }
1837
1838 int
1839 rumpfs_mountroot()
1840 {
1841 struct mount *mp;
1842 int error;
1843
1844 if ((error = vfs_rootmountalloc(MOUNT_RUMPFS, "rootdev", &mp)) != 0) {
1845 vrele(rootvp);
1846 return error;
1847 }
1848
1849 if ((error = rumpfs_mountfs(mp)) != 0)
1850 panic("mounting rootfs failed: %d", error);
1851
1852 mountlist_append(mp);
1853
1854 error = set_statvfs_info("/", UIO_SYSSPACE, "rumpfs", UIO_SYSSPACE,
1855 mp->mnt_op->vfs_name, mp, curlwp);
1856 if (error)
1857 panic("set_statvfs_info failed for rootfs: %d", error);
1858
1859 mp->mnt_flag &= ~MNT_RDONLY;
1860 vfs_unbusy(mp, false, NULL);
1861
1862 return 0;
1863 }
1864