rumpfs.c revision 1.119 1 /* $NetBSD: rumpfs.c,v 1.119 2013/08/05 11:14:00 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2009, 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: rumpfs.c,v 1.119 2013/08/05 11:14:00 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/buf.h>
34 #include <sys/dirent.h>
35 #include <sys/errno.h>
36 #include <sys/filedesc.h>
37 #include <sys/fcntl.h>
38 #include <sys/kauth.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/lock.h>
44 #include <sys/lockf.h>
45 #include <sys/queue.h>
46 #include <sys/stat.h>
47 #include <sys/syscallargs.h>
48 #include <sys/vnode.h>
49 #include <sys/unistd.h>
50
51 #include <miscfs/fifofs/fifo.h>
52 #include <miscfs/specfs/specdev.h>
53 #include <miscfs/genfs/genfs.h>
54 #include <miscfs/genfs/genfs_node.h>
55
56 #include <uvm/uvm_extern.h>
57
58 #include <rump/rumpuser.h>
59
60 #include "rump_private.h"
61 #include "rump_vfs_private.h"
62
63 static int rump_vop_lookup(void *);
64 static int rump_vop_getattr(void *);
65 static int rump_vop_setattr(void *);
66 static int rump_vop_mkdir(void *);
67 static int rump_vop_rmdir(void *);
68 static int rump_vop_remove(void *);
69 static int rump_vop_mknod(void *);
70 static int rump_vop_create(void *);
71 static int rump_vop_inactive(void *);
72 static int rump_vop_reclaim(void *);
73 static int rump_vop_success(void *);
74 static int rump_vop_readdir(void *);
75 static int rump_vop_spec(void *);
76 static int rump_vop_read(void *);
77 static int rump_vop_write(void *);
78 static int rump_vop_open(void *);
79 static int rump_vop_symlink(void *);
80 static int rump_vop_readlink(void *);
81 static int rump_vop_whiteout(void *);
82 static int rump_vop_pathconf(void *);
83 static int rump_vop_bmap(void *);
84 static int rump_vop_strategy(void *);
85 static int rump_vop_advlock(void *);
86 static int rump_vop_access(void *);
87
88 int (**fifo_vnodeop_p)(void *);
89 const struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
90 { &vop_default_desc, vn_default_error },
91 { &vop_putpages_desc, genfs_null_putpages },
92 { NULL, NULL }
93 };
94 const struct vnodeopv_desc fifo_vnodeop_opv_desc =
95 { &fifo_vnodeop_p, fifo_vnodeop_entries };
96
97 int (**rump_vnodeop_p)(void *);
98 const struct vnodeopv_entry_desc rump_vnodeop_entries[] = {
99 { &vop_default_desc, vn_default_error },
100 { &vop_lookup_desc, rump_vop_lookup },
101 { &vop_getattr_desc, rump_vop_getattr },
102 { &vop_setattr_desc, rump_vop_setattr },
103 { &vop_mkdir_desc, rump_vop_mkdir },
104 { &vop_rmdir_desc, rump_vop_rmdir },
105 { &vop_remove_desc, rump_vop_remove },
106 { &vop_mknod_desc, rump_vop_mknod },
107 { &vop_create_desc, rump_vop_create },
108 { &vop_symlink_desc, rump_vop_symlink },
109 { &vop_readlink_desc, rump_vop_readlink },
110 { &vop_access_desc, rump_vop_access },
111 { &vop_readdir_desc, rump_vop_readdir },
112 { &vop_read_desc, rump_vop_read },
113 { &vop_write_desc, rump_vop_write },
114 { &vop_open_desc, rump_vop_open },
115 { &vop_close_desc, genfs_nullop },
116 { &vop_seek_desc, genfs_seek },
117 { &vop_getpages_desc, genfs_getpages },
118 { &vop_putpages_desc, genfs_putpages },
119 { &vop_whiteout_desc, rump_vop_whiteout },
120 { &vop_fsync_desc, rump_vop_success },
121 { &vop_lock_desc, genfs_lock },
122 { &vop_unlock_desc, genfs_unlock },
123 { &vop_islocked_desc, genfs_islocked },
124 { &vop_inactive_desc, rump_vop_inactive },
125 { &vop_reclaim_desc, rump_vop_reclaim },
126 { &vop_link_desc, genfs_eopnotsupp },
127 { &vop_pathconf_desc, rump_vop_pathconf },
128 { &vop_bmap_desc, rump_vop_bmap },
129 { &vop_strategy_desc, rump_vop_strategy },
130 { &vop_advlock_desc, rump_vop_advlock },
131 { NULL, NULL }
132 };
133 const struct vnodeopv_desc rump_vnodeop_opv_desc =
134 { &rump_vnodeop_p, rump_vnodeop_entries };
135
136 int (**rump_specop_p)(void *);
137 const struct vnodeopv_entry_desc rump_specop_entries[] = {
138 { &vop_default_desc, rump_vop_spec },
139 { NULL, NULL }
140 };
141 const struct vnodeopv_desc rump_specop_opv_desc =
142 { &rump_specop_p, rump_specop_entries };
143
144 const struct vnodeopv_desc * const rump_opv_descs[] = {
145 &rump_vnodeop_opv_desc,
146 &rump_specop_opv_desc,
147 NULL
148 };
149
150 #define RUMPFS_WHITEOUT ((void *)-1)
151 #define RDENT_ISWHITEOUT(rdp) (rdp->rd_node == RUMPFS_WHITEOUT)
152 struct rumpfs_dent {
153 char *rd_name;
154 int rd_namelen;
155 struct rumpfs_node *rd_node;
156
157 LIST_ENTRY(rumpfs_dent) rd_entries;
158 };
159
160 struct genfs_ops rumpfs_genfsops = {
161 .gop_size = genfs_size,
162 .gop_write = genfs_gop_write,
163
164 /* optional */
165 .gop_alloc = NULL,
166 .gop_markupdate = NULL,
167 };
168
169 struct rumpfs_node {
170 struct genfs_node rn_gn;
171 struct vattr rn_va;
172 struct vnode *rn_vp;
173 char *rn_hostpath;
174 int rn_flags;
175 struct lockf *rn_lockf;
176
177 union {
178 struct { /* VREG */
179 int readfd;
180 int writefd;
181 uint64_t offset;
182 } reg;
183 struct {
184 void *data;
185 size_t dlen;
186 } reg_noet;
187 struct { /* VDIR */
188 LIST_HEAD(, rumpfs_dent) dents;
189 struct rumpfs_node *parent;
190 int flags;
191 } dir;
192 struct {
193 char *target;
194 size_t len;
195 } link;
196 } rn_u;
197 };
198 #define rn_readfd rn_u.reg.readfd
199 #define rn_writefd rn_u.reg.writefd
200 #define rn_offset rn_u.reg.offset
201 #define rn_data rn_u.reg_noet.data
202 #define rn_dlen rn_u.reg_noet.dlen
203 #define rn_dir rn_u.dir.dents
204 #define rn_parent rn_u.dir.parent
205 #define rn_linktarg rn_u.link.target
206 #define rn_linklen rn_u.link.len
207
208 #define RUMPNODE_CANRECLAIM 0x01
209 #define RUMPNODE_DIR_ET 0x02
210 #define RUMPNODE_DIR_ETSUBS 0x04
211 #define RUMPNODE_ET_PHONE_HOST 0x10
212
213 struct rumpfs_mount {
214 struct vnode *rfsmp_rvp;
215 };
216
217 #define INO_WHITEOUT 1
218 static int lastino = 2;
219 static kmutex_t reclock;
220
221 #define RUMPFS_DEFAULTMODE 0755
222 static void freedir(struct rumpfs_node *, struct componentname *);
223 static struct rumpfs_node *makeprivate(enum vtype, mode_t, dev_t, off_t, bool);
224
225 /*
226 * Extra Terrestrial stuff. We map a given key (pathname) to a file on
227 * the host FS. ET phones home only from the root node of rumpfs.
228 *
229 * When an etfs node is removed, a vnode potentially behind it is not
230 * immediately recycled.
231 */
232
233 struct etfs {
234 char et_key[MAXPATHLEN];
235 size_t et_keylen;
236 bool et_prefixkey;
237 bool et_removing;
238 devminor_t et_blkmin;
239
240 LIST_ENTRY(etfs) et_entries;
241
242 struct rumpfs_node *et_rn;
243 };
244 static kmutex_t etfs_lock;
245 static LIST_HEAD(, etfs) etfs_list = LIST_HEAD_INITIALIZER(etfs_list);
246
247 static enum vtype
248 ettype_to_vtype(enum rump_etfs_type et)
249 {
250 enum vtype vt;
251
252 switch (et) {
253 case RUMP_ETFS_REG:
254 vt = VREG;
255 break;
256 case RUMP_ETFS_BLK:
257 vt = VBLK;
258 break;
259 case RUMP_ETFS_CHR:
260 vt = VCHR;
261 break;
262 case RUMP_ETFS_DIR:
263 vt = VDIR;
264 break;
265 case RUMP_ETFS_DIR_SUBDIRS:
266 vt = VDIR;
267 break;
268 default:
269 panic("invalid et type: %d", et);
270 }
271
272 return vt;
273 }
274
275 static enum vtype
276 hft_to_vtype(int hft)
277 {
278 enum vtype vt;
279
280 switch (hft) {
281 case RUMPUSER_FT_OTHER:
282 vt = VNON;
283 break;
284 case RUMPUSER_FT_DIR:
285 vt = VDIR;
286 break;
287 case RUMPUSER_FT_REG:
288 vt = VREG;
289 break;
290 case RUMPUSER_FT_BLK:
291 vt = VBLK;
292 break;
293 case RUMPUSER_FT_CHR:
294 vt = VCHR;
295 break;
296 default:
297 vt = VNON;
298 break;
299 }
300
301 return vt;
302 }
303
304 static bool
305 etfs_find(const char *key, struct etfs **etp, bool forceprefix)
306 {
307 struct etfs *et;
308 size_t keylen = strlen(key);
309
310 KASSERT(mutex_owned(&etfs_lock));
311
312 LIST_FOREACH(et, &etfs_list, et_entries) {
313 if ((keylen == et->et_keylen || et->et_prefixkey || forceprefix)
314 && strncmp(key, et->et_key, et->et_keylen) == 0) {
315 if (etp)
316 *etp = et;
317 return true;
318 }
319 }
320
321 return false;
322 }
323
324 #define REGDIR(ftype) \
325 ((ftype) == RUMP_ETFS_DIR || (ftype) == RUMP_ETFS_DIR_SUBDIRS)
326 static int
327 doregister(const char *key, const char *hostpath,
328 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
329 {
330 char buf[9];
331 struct etfs *et;
332 struct rumpfs_node *rn;
333 uint64_t fsize;
334 dev_t rdev = NODEV;
335 devminor_t dmin = -1;
336 int hft, error;
337
338 if (key[0] != '/') {
339 return EINVAL;
340 }
341 while (key[0] == '/') {
342 key++;
343 }
344
345 if ((error = rumpuser_getfileinfo(hostpath, &fsize, &hft)) != 0)
346 return error;
347
348 /* etfs directory requires a directory on the host */
349 if (REGDIR(ftype)) {
350 if (hft != RUMPUSER_FT_DIR)
351 return ENOTDIR;
352 if (begin != 0)
353 return EISDIR;
354 if (size != RUMP_ETFS_SIZE_ENDOFF)
355 return EISDIR;
356 size = fsize;
357 } else {
358 if (begin > fsize)
359 return EINVAL;
360 if (size == RUMP_ETFS_SIZE_ENDOFF)
361 size = fsize - begin;
362 if (begin + size > fsize)
363 return EINVAL;
364 }
365
366 if (ftype == RUMP_ETFS_BLK || ftype == RUMP_ETFS_CHR) {
367 error = rumpblk_register(hostpath, &dmin, begin, size);
368 if (error != 0) {
369 return error;
370 }
371 rdev = makedev(RUMPBLK_DEVMAJOR, dmin);
372 }
373
374 et = kmem_alloc(sizeof(*et), KM_SLEEP);
375 strcpy(et->et_key, key);
376 et->et_keylen = strlen(et->et_key);
377 et->et_rn = rn = makeprivate(ettype_to_vtype(ftype), RUMPFS_DEFAULTMODE,
378 rdev, size, true);
379 et->et_removing = false;
380 et->et_blkmin = dmin;
381
382 rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
383
384 if (ftype == RUMP_ETFS_REG || REGDIR(ftype) || et->et_blkmin != -1) {
385 size_t len = strlen(hostpath)+1;
386
387 rn->rn_hostpath = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
388 memcpy(rn->rn_hostpath, hostpath, len);
389 rn->rn_offset = begin;
390 }
391
392 if (REGDIR(ftype)) {
393 rn->rn_flags |= RUMPNODE_DIR_ET;
394 et->et_prefixkey = true;
395 } else {
396 et->et_prefixkey = false;
397 }
398
399 if (ftype == RUMP_ETFS_DIR_SUBDIRS)
400 rn->rn_flags |= RUMPNODE_DIR_ETSUBS;
401
402 mutex_enter(&etfs_lock);
403 if (etfs_find(key, NULL, REGDIR(ftype))) {
404 mutex_exit(&etfs_lock);
405 if (et->et_blkmin != -1)
406 rumpblk_deregister(hostpath);
407 if (et->et_rn->rn_hostpath != NULL)
408 free(et->et_rn->rn_hostpath, M_TEMP);
409 kmem_free(et->et_rn, sizeof(*et->et_rn));
410 kmem_free(et, sizeof(*et));
411 return EEXIST;
412 }
413 LIST_INSERT_HEAD(&etfs_list, et, et_entries);
414 mutex_exit(&etfs_lock);
415
416 if (ftype == RUMP_ETFS_BLK) {
417 format_bytes(buf, sizeof(buf), size);
418 aprint_verbose("/%s: hostpath %s (%s)\n", key, hostpath, buf);
419 }
420
421 return 0;
422 }
423 #undef REGDIR
424
425 int
426 rump_etfs_register(const char *key, const char *hostpath,
427 enum rump_etfs_type ftype)
428 {
429
430 return doregister(key, hostpath, ftype, 0, RUMP_ETFS_SIZE_ENDOFF);
431 }
432
433 int
434 rump_etfs_register_withsize(const char *key, const char *hostpath,
435 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
436 {
437
438 return doregister(key, hostpath, ftype, begin, size);
439 }
440
441 /* remove etfs mapping. caller's responsibility to make sure it's not in use */
442 int
443 rump_etfs_remove(const char *key)
444 {
445 struct etfs *et;
446 size_t keylen;
447 int rv;
448
449 if (key[0] != '/') {
450 return EINVAL;
451 }
452 while (key[0] == '/') {
453 key++;
454 }
455
456 keylen = strlen(key);
457
458 mutex_enter(&etfs_lock);
459 LIST_FOREACH(et, &etfs_list, et_entries) {
460 if (keylen == et->et_keylen && strcmp(et->et_key, key) == 0) {
461 if (et->et_removing)
462 et = NULL;
463 else
464 et->et_removing = true;
465 break;
466 }
467 }
468 mutex_exit(&etfs_lock);
469 if (!et)
470 return ENOENT;
471
472 /*
473 * ok, we know what we want to remove and have signalled there
474 * actually are men at work. first, unregister from rumpblk
475 */
476 if (et->et_blkmin != -1) {
477 rv = rumpblk_deregister(et->et_rn->rn_hostpath);
478 } else {
479 rv = 0;
480 }
481 KASSERT(rv == 0);
482
483 /* then do the actual removal */
484 mutex_enter(&etfs_lock);
485 LIST_REMOVE(et, et_entries);
486 mutex_exit(&etfs_lock);
487
488 /* node is unreachable, safe to nuke all device copies */
489 if (et->et_blkmin != -1) {
490 vdevgone(RUMPBLK_DEVMAJOR, et->et_blkmin, et->et_blkmin, VBLK);
491 } else {
492 struct vnode *vp;
493
494 mutex_enter(&reclock);
495 if ((vp = et->et_rn->rn_vp) != NULL)
496 mutex_enter(vp->v_interlock);
497 mutex_exit(&reclock);
498 if (vp && vget(vp, 0) == 0)
499 vgone(vp);
500 }
501
502 if (et->et_rn->rn_hostpath != NULL)
503 free(et->et_rn->rn_hostpath, M_TEMP);
504 kmem_free(et->et_rn, sizeof(*et->et_rn));
505 kmem_free(et, sizeof(*et));
506
507 return 0;
508 }
509
510 /*
511 * rumpfs
512 */
513
514 static struct rumpfs_node *
515 makeprivate(enum vtype vt, mode_t mode, dev_t rdev, off_t size, bool et)
516 {
517 struct rumpfs_node *rn;
518 struct vattr *va;
519 struct timespec ts;
520
521 KASSERT((mode & ~ALLPERMS) == 0);
522 rn = kmem_zalloc(sizeof(*rn), KM_SLEEP);
523
524 switch (vt) {
525 case VDIR:
526 LIST_INIT(&rn->rn_dir);
527 break;
528 case VREG:
529 if (et) {
530 rn->rn_readfd = -1;
531 rn->rn_writefd = -1;
532 }
533 break;
534 default:
535 break;
536 }
537
538 nanotime(&ts);
539
540 va = &rn->rn_va;
541 va->va_type = vt;
542 va->va_mode = mode;
543 if (vt == VDIR)
544 va->va_nlink = 2;
545 else
546 va->va_nlink = 1;
547 va->va_uid = 0;
548 va->va_gid = 0;
549 va->va_fsid =
550 va->va_fileid = atomic_inc_uint_nv(&lastino);
551 va->va_size = size;
552 va->va_blocksize = 512;
553 va->va_atime = ts;
554 va->va_mtime = ts;
555 va->va_ctime = ts;
556 va->va_birthtime = ts;
557 va->va_gen = 0;
558 va->va_flags = 0;
559 va->va_rdev = rdev;
560 va->va_bytes = 512;
561 va->va_filerev = 0;
562 va->va_vaflags = 0;
563
564 return rn;
565 }
566
567 static int
568 makevnode(struct mount *mp, struct rumpfs_node *rn, struct vnode **vpp)
569 {
570 struct vnode *vp;
571 int (**vpops)(void *);
572 struct vattr *va = &rn->rn_va;
573 int rv;
574
575 KASSERT(!mutex_owned(&reclock));
576
577 if (va->va_type == VCHR || va->va_type == VBLK) {
578 vpops = rump_specop_p;
579 } else {
580 vpops = rump_vnodeop_p;
581 }
582
583 rv = getnewvnode(VT_RUMP, mp, vpops, NULL, &vp);
584 if (rv)
585 return rv;
586
587 vp->v_size = vp->v_writesize = va->va_size;
588 vp->v_type = va->va_type;
589
590 if (vpops == rump_specop_p) {
591 spec_node_init(vp, va->va_rdev);
592 }
593 vp->v_data = rn;
594
595 genfs_node_init(vp, &rumpfs_genfsops);
596 vn_lock(vp, LK_RETRY | LK_EXCLUSIVE);
597 mutex_enter(&reclock);
598 rn->rn_vp = vp;
599 mutex_exit(&reclock);
600
601 *vpp = vp;
602
603 return 0;
604 }
605
606
607 static void
608 makedir(struct rumpfs_node *rnd,
609 struct componentname *cnp, struct rumpfs_node *rn)
610 {
611 struct rumpfs_dent *rdent;
612
613 rdent = kmem_alloc(sizeof(*rdent), KM_SLEEP);
614 rdent->rd_name = kmem_alloc(cnp->cn_namelen+1, KM_SLEEP);
615 rdent->rd_node = rn;
616 strlcpy(rdent->rd_name, cnp->cn_nameptr, cnp->cn_namelen+1);
617 rdent->rd_namelen = strlen(rdent->rd_name);
618
619 if ((cnp->cn_flags & ISWHITEOUT) != 0) {
620 KASSERT((cnp->cn_flags & DOWHITEOUT) == 0);
621 freedir(rnd, cnp);
622 }
623 LIST_INSERT_HEAD(&rnd->rn_dir, rdent, rd_entries);
624 }
625
626 static void
627 freedir(struct rumpfs_node *rnd, struct componentname *cnp)
628 {
629 struct rumpfs_dent *rd = NULL;
630
631 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
632 if (rd->rd_namelen == cnp->cn_namelen &&
633 strncmp(rd->rd_name, cnp->cn_nameptr,
634 cnp->cn_namelen) == 0)
635 break;
636 }
637 if (rd == NULL)
638 panic("could not find directory entry: %s", cnp->cn_nameptr);
639
640 if (cnp->cn_flags & DOWHITEOUT) {
641 rd->rd_node = RUMPFS_WHITEOUT;
642 } else {
643 LIST_REMOVE(rd, rd_entries);
644 kmem_free(rd->rd_name, rd->rd_namelen+1);
645 kmem_free(rd, sizeof(*rd));
646 }
647 }
648
649 /*
650 * Simple lookup for rump file systems.
651 *
652 * uhm, this is twisted. C F C C, hope of C C F C looming
653 */
654 static int
655 rump_vop_lookup(void *v)
656 {
657 struct vop_lookup_args /* {
658 struct vnode *a_dvp;
659 struct vnode **a_vpp;
660 struct componentname *a_cnp;
661 }; */ *ap = v;
662 struct componentname *cnp = ap->a_cnp;
663 struct vnode *dvp = ap->a_dvp;
664 struct vnode **vpp = ap->a_vpp;
665 struct vnode *vp;
666 struct rumpfs_node *rnd = dvp->v_data, *rn;
667 struct rumpfs_dent *rd = NULL;
668 struct etfs *et;
669 bool dotdot = (cnp->cn_flags & ISDOTDOT) != 0;
670 int rv = 0;
671 const char *cp;
672
673 *vpp = NULL;
674
675 rv = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
676 if (rv)
677 return rv;
678
679 if ((cnp->cn_flags & ISLASTCN)
680 && (dvp->v_mount->mnt_flag & MNT_RDONLY)
681 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
682 return EROFS;
683
684 /* check for dot, return directly if the case */
685 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
686 vref(dvp);
687 *vpp = dvp;
688 return 0;
689 }
690
691 /* we don't do rename */
692 if (!(((cnp->cn_flags & ISLASTCN) == 0) || (cnp->cn_nameiop != RENAME)))
693 return EOPNOTSUPP;
694
695 /* check for etfs */
696 if (dvp == rootvnode &&
697 (cnp->cn_nameiop == LOOKUP || cnp->cn_nameiop == CREATE)) {
698 bool found;
699 mutex_enter(&etfs_lock);
700 found = etfs_find(cnp->cn_nameptr, &et, false);
701 mutex_exit(&etfs_lock);
702
703 if (found) {
704 rn = et->et_rn;
705 cnp->cn_consume += et->et_keylen - cnp->cn_namelen;
706 /*
707 * consume trailing slashes if any and clear
708 * REQUIREDIR if we consumed the full path.
709 */
710 cp = &cnp->cn_nameptr[cnp->cn_namelen];
711 cp += cnp->cn_consume;
712 KASSERT(*cp == '\0' || *cp == '/');
713 if (*cp == '\0' && rn->rn_va.va_type != VDIR)
714 cnp->cn_flags &= ~REQUIREDIR;
715 while (*cp++ == '/')
716 cnp->cn_consume++;
717 goto getvnode;
718 }
719 }
720
721 if (rnd->rn_flags & RUMPNODE_DIR_ET) {
722 uint64_t fsize;
723 char *newpath;
724 size_t newpathlen;
725 int hft, error;
726
727 if (dotdot)
728 return EOPNOTSUPP;
729
730 newpathlen = strlen(rnd->rn_hostpath) + 1 + cnp->cn_namelen + 1;
731 newpath = malloc(newpathlen, M_TEMP, M_WAITOK);
732
733 strlcpy(newpath, rnd->rn_hostpath, newpathlen);
734 strlcat(newpath, "/", newpathlen);
735 strlcat(newpath, cnp->cn_nameptr, newpathlen);
736
737 if ((error = rumpuser_getfileinfo(newpath, &fsize, &hft)) != 0){
738 free(newpath, M_TEMP);
739 return error;
740 }
741
742 /* allow only dirs and regular files */
743 if (hft != RUMPUSER_FT_REG && hft != RUMPUSER_FT_DIR) {
744 free(newpath, M_TEMP);
745 return ENOENT;
746 }
747
748 rn = makeprivate(hft_to_vtype(hft), RUMPFS_DEFAULTMODE,
749 NODEV, fsize, true);
750 rn->rn_flags |= RUMPNODE_CANRECLAIM;
751 if (rnd->rn_flags & RUMPNODE_DIR_ETSUBS) {
752 rn->rn_flags |= RUMPNODE_DIR_ET | RUMPNODE_DIR_ETSUBS;
753 rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
754 }
755 rn->rn_hostpath = newpath;
756
757 goto getvnode;
758 } else {
759 if (dotdot) {
760 if ((rn = rnd->rn_parent) != NULL)
761 goto getvnode;
762 } else {
763 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
764 if (rd->rd_namelen == cnp->cn_namelen &&
765 strncmp(rd->rd_name, cnp->cn_nameptr,
766 cnp->cn_namelen) == 0)
767 break;
768 }
769 }
770 }
771
772 if (!rd && ((cnp->cn_flags & ISLASTCN) == 0||cnp->cn_nameiop != CREATE))
773 return ENOENT;
774
775 if (!rd && (cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
776 if (dvp->v_mount->mnt_flag & MNT_RDONLY)
777 return EROFS;
778 rv = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
779 if (rv)
780 return rv;
781 return EJUSTRETURN;
782 }
783
784 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == DELETE) {
785 rv = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
786 if (rv)
787 return rv;
788 }
789
790 if (RDENT_ISWHITEOUT(rd)) {
791 cnp->cn_flags |= ISWHITEOUT;
792 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE)
793 return EJUSTRETURN;
794 return ENOENT;
795 }
796
797 rn = rd->rd_node;
798
799 getvnode:
800 KASSERT(rn);
801 if (dotdot)
802 VOP_UNLOCK(dvp);
803 mutex_enter(&reclock);
804 if ((vp = rn->rn_vp)) {
805 mutex_enter(vp->v_interlock);
806 mutex_exit(&reclock);
807 if (vget(vp, LK_EXCLUSIVE)) {
808 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
809 goto getvnode;
810 }
811 *vpp = vp;
812 } else {
813 mutex_exit(&reclock);
814 rv = makevnode(dvp->v_mount, rn, vpp);
815 }
816 if (dotdot)
817 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
818
819 return rv;
820 }
821
822 static int
823 rump_check_possible(struct vnode *vp, struct rumpfs_node *rnode,
824 mode_t mode)
825 {
826
827 if ((mode & VWRITE) == 0)
828 return 0;
829
830 switch (vp->v_type) {
831 case VDIR:
832 case VLNK:
833 case VREG:
834 break;
835 default:
836 /* special file is always writable. */
837 return 0;
838 }
839
840 return vp->v_mount->mnt_flag & MNT_RDONLY ? EROFS : 0;
841 }
842
843 static int
844 rump_check_permitted(struct vnode *vp, struct rumpfs_node *rnode,
845 mode_t mode, kauth_cred_t cred)
846 {
847 struct vattr *attr = &rnode->rn_va;
848
849 return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(mode,
850 vp->v_type, attr->va_mode), vp, NULL, genfs_can_access(vp->v_type,
851 attr->va_mode, attr->va_uid, attr->va_gid, mode, cred));
852 }
853
854 int
855 rump_vop_access(void *v)
856 {
857 struct vop_access_args /* {
858 const struct vnodeop_desc *a_desc;
859 struct vnode *a_vp;
860 int a_mode;
861 kauth_cred_t a_cred;
862 } */ *ap = v;
863 struct vnode *vp = ap->a_vp;
864 struct rumpfs_node *rn = vp->v_data;
865 int error;
866
867 error = rump_check_possible(vp, rn, ap->a_mode);
868 if (error)
869 return error;
870
871 error = rump_check_permitted(vp, rn, ap->a_mode, ap->a_cred);
872
873 return error;
874 }
875
876 static int
877 rump_vop_getattr(void *v)
878 {
879 struct vop_getattr_args /* {
880 struct vnode *a_vp;
881 struct vattr *a_vap;
882 kauth_cred_t a_cred;
883 } */ *ap = v;
884 struct vnode *vp = ap->a_vp;
885 struct rumpfs_node *rn = vp->v_data;
886 struct vattr *vap = ap->a_vap;
887
888 memcpy(vap, &rn->rn_va, sizeof(struct vattr));
889 vap->va_size = vp->v_size;
890 return 0;
891 }
892
893 static int
894 rump_vop_setattr(void *v)
895 {
896 struct vop_setattr_args /* {
897 struct vnode *a_vp;
898 struct vattr *a_vap;
899 kauth_cred_t a_cred;
900 } */ *ap = v;
901 struct vnode *vp = ap->a_vp;
902 struct vattr *vap = ap->a_vap;
903 struct rumpfs_node *rn = vp->v_data;
904 struct vattr *attr = &rn->rn_va;
905 kauth_cred_t cred = ap->a_cred;
906 int error;
907
908 #define CHANGED(a, t) (vap->a != (t)VNOVAL)
909 #define SETIFVAL(a,t) if (CHANGED(a, t)) rn->rn_va.a = vap->a
910 if (CHANGED(va_atime.tv_sec, time_t) ||
911 CHANGED(va_ctime.tv_sec, time_t) ||
912 CHANGED(va_mtime.tv_sec, time_t) ||
913 CHANGED(va_birthtime.tv_sec, time_t) ||
914 CHANGED(va_atime.tv_nsec, long) ||
915 CHANGED(va_ctime.tv_nsec, long) ||
916 CHANGED(va_mtime.tv_nsec, long) ||
917 CHANGED(va_birthtime.tv_nsec, long)) {
918 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp,
919 NULL, genfs_can_chtimes(vp, vap->va_vaflags, attr->va_uid,
920 cred));
921 if (error)
922 return error;
923 }
924
925 SETIFVAL(va_atime.tv_sec, time_t);
926 SETIFVAL(va_ctime.tv_sec, time_t);
927 SETIFVAL(va_mtime.tv_sec, time_t);
928 SETIFVAL(va_birthtime.tv_sec, time_t);
929 SETIFVAL(va_atime.tv_nsec, long);
930 SETIFVAL(va_ctime.tv_nsec, long);
931 SETIFVAL(va_mtime.tv_nsec, long);
932 SETIFVAL(va_birthtime.tv_nsec, long);
933
934 if (CHANGED(va_flags, u_long)) {
935 /* XXX Can we handle system flags here...? */
936 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_FLAGS, vp,
937 NULL, genfs_can_chflags(cred, vp->v_type, attr->va_uid,
938 false));
939 if (error)
940 return error;
941 }
942
943 SETIFVAL(va_flags, u_long);
944 #undef SETIFVAL
945 #undef CHANGED
946
947 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (uid_t)VNOVAL) {
948 uid_t uid =
949 (vap->va_uid != (uid_t)VNOVAL) ? vap->va_uid : attr->va_uid;
950 gid_t gid =
951 (vap->va_gid != (gid_t)VNOVAL) ? vap->va_gid : attr->va_gid;
952 error = kauth_authorize_vnode(cred,
953 KAUTH_VNODE_CHANGE_OWNERSHIP, vp, NULL,
954 genfs_can_chown(cred, attr->va_uid, attr->va_gid, uid,
955 gid));
956 if (error)
957 return error;
958 attr->va_uid = uid;
959 attr->va_gid = gid;
960 }
961
962 if (vap->va_mode != (mode_t)VNOVAL) {
963 mode_t mode = vap->va_mode;
964 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY,
965 vp, NULL, genfs_can_chmod(vp->v_type, cred, attr->va_uid,
966 attr->va_gid, mode));
967 if (error)
968 return error;
969 attr->va_mode = mode;
970 }
971
972 if (vp->v_type == VREG &&
973 vap->va_size != VSIZENOTSET &&
974 vap->va_size != rn->rn_dlen) {
975 void *newdata;
976 size_t copylen, newlen;
977
978 newlen = vap->va_size;
979 newdata = rump_hypermalloc(newlen, 0, true, "rumpfs");
980
981 copylen = MIN(rn->rn_dlen, newlen);
982 memset(newdata, 0, newlen);
983 memcpy(newdata, rn->rn_data, copylen);
984 rump_hyperfree(rn->rn_data, rn->rn_dlen);
985
986 rn->rn_data = newdata;
987 rn->rn_dlen = newlen;
988 uvm_vnp_setsize(vp, newlen);
989 }
990 return 0;
991 }
992
993 static int
994 rump_vop_mkdir(void *v)
995 {
996 struct vop_mkdir_args /* {
997 struct vnode *a_dvp;
998 struct vnode **a_vpp;
999 struct componentname *a_cnp;
1000 struct vattr *a_vap;
1001 }; */ *ap = v;
1002 struct vnode *dvp = ap->a_dvp;
1003 struct vnode **vpp = ap->a_vpp;
1004 struct componentname *cnp = ap->a_cnp;
1005 struct vattr *va = ap->a_vap;
1006 struct rumpfs_node *rnd = dvp->v_data, *rn;
1007 int rv = 0;
1008
1009 rn = makeprivate(VDIR, va->va_mode & ALLPERMS, NODEV, DEV_BSIZE, false);
1010 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1011 rn->rn_va.va_flags |= UF_OPAQUE;
1012 rn->rn_parent = rnd;
1013 rv = makevnode(dvp->v_mount, rn, vpp);
1014 if (rv)
1015 goto out;
1016
1017 makedir(rnd, cnp, rn);
1018
1019 out:
1020 vput(dvp);
1021 return rv;
1022 }
1023
1024 static int
1025 rump_vop_rmdir(void *v)
1026 {
1027 struct vop_rmdir_args /* {
1028 struct vnode *a_dvp;
1029 struct vnode *a_vp;
1030 struct componentname *a_cnp;
1031 }; */ *ap = v;
1032 struct vnode *dvp = ap->a_dvp;
1033 struct vnode *vp = ap->a_vp;
1034 struct componentname *cnp = ap->a_cnp;
1035 struct rumpfs_node *rnd = dvp->v_data;
1036 struct rumpfs_node *rn = vp->v_data;
1037 struct rumpfs_dent *rd;
1038 int rv = 0;
1039
1040 LIST_FOREACH(rd, &rn->rn_dir, rd_entries) {
1041 if (rd->rd_node != RUMPFS_WHITEOUT) {
1042 rv = ENOTEMPTY;
1043 goto out;
1044 }
1045 }
1046 while ((rd = LIST_FIRST(&rn->rn_dir)) != NULL) {
1047 KASSERT(rd->rd_node == RUMPFS_WHITEOUT);
1048 LIST_REMOVE(rd, rd_entries);
1049 kmem_free(rd->rd_name, rd->rd_namelen+1);
1050 kmem_free(rd, sizeof(*rd));
1051 }
1052
1053 freedir(rnd, cnp);
1054 rn->rn_flags |= RUMPNODE_CANRECLAIM;
1055 rn->rn_parent = NULL;
1056
1057 out:
1058 vput(dvp);
1059 vput(vp);
1060
1061 return rv;
1062 }
1063
1064 static int
1065 rump_vop_remove(void *v)
1066 {
1067 struct vop_remove_args /* {
1068 struct vnode *a_dvp;
1069 struct vnode *a_vp;
1070 struct componentname *a_cnp;
1071 }; */ *ap = v;
1072 struct vnode *dvp = ap->a_dvp;
1073 struct vnode *vp = ap->a_vp;
1074 struct componentname *cnp = ap->a_cnp;
1075 struct rumpfs_node *rnd = dvp->v_data;
1076 struct rumpfs_node *rn = vp->v_data;
1077 int rv = 0;
1078
1079 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1080 return EOPNOTSUPP;
1081
1082 freedir(rnd, cnp);
1083 rn->rn_flags |= RUMPNODE_CANRECLAIM;
1084
1085 vput(dvp);
1086 vput(vp);
1087
1088 return rv;
1089 }
1090
1091 static int
1092 rump_vop_mknod(void *v)
1093 {
1094 struct vop_mknod_args /* {
1095 struct vnode *a_dvp;
1096 struct vnode **a_vpp;
1097 struct componentname *a_cnp;
1098 struct vattr *a_vap;
1099 }; */ *ap = v;
1100 struct vnode *dvp = ap->a_dvp;
1101 struct vnode **vpp = ap->a_vpp;
1102 struct componentname *cnp = ap->a_cnp;
1103 struct vattr *va = ap->a_vap;
1104 struct rumpfs_node *rnd = dvp->v_data, *rn;
1105 int rv;
1106
1107 rn = makeprivate(va->va_type, va->va_mode & ALLPERMS, va->va_rdev,
1108 DEV_BSIZE, false);
1109 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1110 rn->rn_va.va_flags |= UF_OPAQUE;
1111 rv = makevnode(dvp->v_mount, rn, vpp);
1112 if (rv)
1113 goto out;
1114
1115 makedir(rnd, cnp, rn);
1116
1117 out:
1118 vput(dvp);
1119 return rv;
1120 }
1121
1122 static int
1123 rump_vop_create(void *v)
1124 {
1125 struct vop_create_args /* {
1126 struct vnode *a_dvp;
1127 struct vnode **a_vpp;
1128 struct componentname *a_cnp;
1129 struct vattr *a_vap;
1130 }; */ *ap = v;
1131 struct vnode *dvp = ap->a_dvp;
1132 struct vnode **vpp = ap->a_vpp;
1133 struct componentname *cnp = ap->a_cnp;
1134 struct vattr *va = ap->a_vap;
1135 struct rumpfs_node *rnd = dvp->v_data, *rn;
1136 off_t newsize;
1137 int rv;
1138
1139 newsize = va->va_type == VSOCK ? DEV_BSIZE : 0;
1140 rn = makeprivate(va->va_type, va->va_mode & ALLPERMS, NODEV,
1141 newsize, false);
1142 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1143 rn->rn_va.va_flags |= UF_OPAQUE;
1144 rv = makevnode(dvp->v_mount, rn, vpp);
1145 if (rv)
1146 goto out;
1147
1148 makedir(rnd, cnp, rn);
1149
1150 out:
1151 vput(dvp);
1152 return rv;
1153 }
1154
1155 static int
1156 rump_vop_symlink(void *v)
1157 {
1158 struct vop_symlink_args /* {
1159 struct vnode *a_dvp;
1160 struct vnode **a_vpp;
1161 struct componentname *a_cnp;
1162 struct vattr *a_vap;
1163 char *a_target;
1164 }; */ *ap = v;
1165 struct vnode *dvp = ap->a_dvp;
1166 struct vnode **vpp = ap->a_vpp;
1167 struct componentname *cnp = ap->a_cnp;
1168 struct vattr *va = ap->a_vap;
1169 struct rumpfs_node *rnd = dvp->v_data, *rn;
1170 const char *target = ap->a_target;
1171 size_t linklen;
1172 int rv;
1173
1174 linklen = strlen(target);
1175 KASSERT(linklen < MAXPATHLEN);
1176 rn = makeprivate(VLNK, va->va_mode & ALLPERMS, NODEV, linklen, false);
1177 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1178 rn->rn_va.va_flags |= UF_OPAQUE;
1179 rv = makevnode(dvp->v_mount, rn, vpp);
1180 if (rv)
1181 goto out;
1182
1183 makedir(rnd, cnp, rn);
1184
1185 KASSERT(linklen < MAXPATHLEN);
1186 rn->rn_linktarg = PNBUF_GET();
1187 rn->rn_linklen = linklen;
1188 strcpy(rn->rn_linktarg, target);
1189
1190 out:
1191 vput(dvp);
1192 return rv;
1193 }
1194
1195 static int
1196 rump_vop_readlink(void *v)
1197 {
1198 struct vop_readlink_args /* {
1199 struct vnode *a_vp;
1200 struct uio *a_uio;
1201 kauth_cred_t a_cred;
1202 }; */ *ap = v;
1203 struct vnode *vp = ap->a_vp;
1204 struct rumpfs_node *rn = vp->v_data;
1205 struct uio *uio = ap->a_uio;
1206
1207 return uiomove(rn->rn_linktarg, rn->rn_linklen, uio);
1208 }
1209
1210 static int
1211 rump_vop_whiteout(void *v)
1212 {
1213 struct vop_whiteout_args /* {
1214 struct vnode *a_dvp;
1215 struct componentname *a_cnp;
1216 int a_flags;
1217 } */ *ap = v;
1218 struct vnode *dvp = ap->a_dvp;
1219 struct rumpfs_node *rnd = dvp->v_data;
1220 struct componentname *cnp = ap->a_cnp;
1221 int flags = ap->a_flags;
1222
1223 switch (flags) {
1224 case LOOKUP:
1225 break;
1226 case CREATE:
1227 makedir(rnd, cnp, RUMPFS_WHITEOUT);
1228 break;
1229 case DELETE:
1230 cnp->cn_flags &= ~DOWHITEOUT; /* cargo culting never fails ? */
1231 freedir(rnd, cnp);
1232 break;
1233 default:
1234 panic("unknown whiteout op %d", flags);
1235 }
1236
1237 return 0;
1238 }
1239
1240 static int
1241 rump_vop_open(void *v)
1242 {
1243 struct vop_open_args /* {
1244 struct vnode *a_vp;
1245 int a_mode;
1246 kauth_cred_t a_cred;
1247 } */ *ap = v;
1248 struct vnode *vp = ap->a_vp;
1249 struct rumpfs_node *rn = vp->v_data;
1250 int mode = ap->a_mode;
1251 int error = EINVAL;
1252
1253 if (vp->v_type != VREG || (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0)
1254 return 0;
1255
1256 if (mode & FREAD) {
1257 if (rn->rn_readfd != -1)
1258 return 0;
1259 error = rumpuser_open(rn->rn_hostpath,
1260 RUMPUSER_OPEN_RDONLY, &rn->rn_readfd);
1261 }
1262
1263 if (mode & FWRITE) {
1264 if (rn->rn_writefd != -1)
1265 return 0;
1266 error = rumpuser_open(rn->rn_hostpath,
1267 RUMPUSER_OPEN_WRONLY, &rn->rn_writefd);
1268 }
1269
1270 return error;
1271 }
1272
1273 /* simple readdir. even omits dotstuff and periods */
1274 static int
1275 rump_vop_readdir(void *v)
1276 {
1277 struct vop_readdir_args /* {
1278 struct vnode *a_vp;
1279 struct uio *a_uio;
1280 kauth_cred_t a_cred;
1281 int *a_eofflag;
1282 off_t **a_cookies;
1283 int *a_ncookies;
1284 } */ *ap = v;
1285 struct vnode *vp = ap->a_vp;
1286 struct uio *uio = ap->a_uio;
1287 struct rumpfs_node *rnd = vp->v_data;
1288 struct rumpfs_dent *rdent;
1289 struct dirent *dentp = NULL;
1290 unsigned i;
1291 int rv = 0;
1292
1293 /* seek to current entry */
1294 for (i = 0, rdent = LIST_FIRST(&rnd->rn_dir);
1295 (i < uio->uio_offset) && rdent;
1296 i++, rdent = LIST_NEXT(rdent, rd_entries))
1297 continue;
1298 if (!rdent)
1299 goto out;
1300
1301 /* copy entries */
1302 dentp = kmem_alloc(sizeof(*dentp), KM_SLEEP);
1303 for (; rdent && uio->uio_resid > 0;
1304 rdent = LIST_NEXT(rdent, rd_entries), i++) {
1305 strlcpy(dentp->d_name, rdent->rd_name, sizeof(dentp->d_name));
1306 dentp->d_namlen = strlen(dentp->d_name);
1307 dentp->d_reclen = _DIRENT_RECLEN(dentp, dentp->d_namlen);
1308
1309 if (__predict_false(RDENT_ISWHITEOUT(rdent))) {
1310 dentp->d_fileno = INO_WHITEOUT;
1311 dentp->d_type = DT_WHT;
1312 } else {
1313 dentp->d_fileno = rdent->rd_node->rn_va.va_fileid;
1314 dentp->d_type = vtype2dt(rdent->rd_node->rn_va.va_type);
1315 }
1316
1317 if (uio->uio_resid < dentp->d_reclen) {
1318 i--;
1319 break;
1320 }
1321
1322 rv = uiomove(dentp, dentp->d_reclen, uio);
1323 if (rv) {
1324 i--;
1325 break;
1326 }
1327 }
1328 kmem_free(dentp, sizeof(*dentp));
1329 dentp = NULL;
1330
1331 out:
1332 KASSERT(dentp == NULL);
1333 if (ap->a_cookies) {
1334 *ap->a_ncookies = 0;
1335 *ap->a_cookies = NULL;
1336 }
1337 if (rdent)
1338 *ap->a_eofflag = 0;
1339 else
1340 *ap->a_eofflag = 1;
1341 uio->uio_offset = i;
1342
1343 return rv;
1344 }
1345
1346 static int
1347 etread(struct rumpfs_node *rn, struct uio *uio)
1348 {
1349 struct rumpuser_iovec iov;
1350 uint8_t *buf;
1351 size_t bufsize, n;
1352 int error = 0;
1353
1354 bufsize = uio->uio_resid;
1355 if (bufsize == 0)
1356 return 0;
1357 buf = kmem_alloc(bufsize, KM_SLEEP);
1358
1359 iov.iov_base = buf;
1360 iov.iov_len = bufsize;
1361 if ((error = rumpuser_iovread(rn->rn_readfd, &iov, 1,
1362 uio->uio_offset + rn->rn_offset, &n)) == 0) {
1363 KASSERT(n <= bufsize);
1364 error = uiomove(buf, n, uio);
1365 }
1366
1367 kmem_free(buf, bufsize);
1368 return error;
1369 }
1370
1371 static int
1372 rump_vop_read(void *v)
1373 {
1374 struct vop_read_args /* {
1375 struct vnode *a_vp;
1376 struct uio *a_uio;
1377 int ioflags a_ioflag;
1378 kauth_cred_t a_cred;
1379 }; */ *ap = v;
1380 struct vnode *vp = ap->a_vp;
1381 struct rumpfs_node *rn = vp->v_data;
1382 struct uio *uio = ap->a_uio;
1383 const int advice = IO_ADV_DECODE(ap->a_ioflag);
1384 off_t chunk;
1385 int error = 0;
1386
1387 if (vp->v_type == VDIR)
1388 return EISDIR;
1389
1390 /* et op? */
1391 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1392 return etread(rn, uio);
1393
1394 /* otherwise, it's off to ubc with us */
1395 while (uio->uio_resid > 0) {
1396 chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1397 if (chunk == 0)
1398 break;
1399 error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1400 UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
1401 if (error)
1402 break;
1403 }
1404
1405 return error;
1406 }
1407
1408 static int
1409 etwrite(struct rumpfs_node *rn, struct uio *uio)
1410 {
1411 struct rumpuser_iovec iov;
1412 uint8_t *buf;
1413 size_t bufsize, n;
1414 int error = 0;
1415
1416 bufsize = uio->uio_resid;
1417 if (bufsize == 0)
1418 return 0;
1419 buf = kmem_alloc(bufsize, KM_SLEEP);
1420 error = uiomove(buf, bufsize, uio);
1421 if (error)
1422 goto out;
1423
1424 KASSERT(uio->uio_resid == 0);
1425 iov.iov_base = buf;
1426 iov.iov_len = bufsize;
1427 if ((error = rumpuser_iovwrite(rn->rn_writefd, &iov, 1,
1428 (uio->uio_offset-bufsize) + rn->rn_offset, &n)) == 0) {
1429 KASSERT(n <= bufsize);
1430 uio->uio_resid = bufsize - n;
1431 }
1432
1433 out:
1434 kmem_free(buf, bufsize);
1435 return error;
1436 }
1437
1438 static int
1439 rump_vop_write(void *v)
1440 {
1441 struct vop_write_args /* {
1442 struct vnode *a_vp;
1443 struct uio *a_uio;
1444 int ioflags a_ioflag;
1445 kauth_cred_t a_cred;
1446 }; */ *ap = v;
1447 struct vnode *vp = ap->a_vp;
1448 struct rumpfs_node *rn = vp->v_data;
1449 struct uio *uio = ap->a_uio;
1450 const int advice = IO_ADV_DECODE(ap->a_ioflag);
1451 void *olddata;
1452 size_t oldlen, newlen;
1453 off_t chunk;
1454 int error = 0;
1455 bool allocd = false;
1456
1457 if (ap->a_ioflag & IO_APPEND)
1458 uio->uio_offset = vp->v_size;
1459
1460 /* consult et? */
1461 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1462 return etwrite(rn, uio);
1463
1464 /*
1465 * Otherwise, it's a case of ubcmove.
1466 */
1467
1468 /*
1469 * First, make sure we have enough storage.
1470 *
1471 * No, you don't need to tell me it's not very efficient.
1472 * No, it doesn't really support sparse files, just fakes it.
1473 */
1474 newlen = uio->uio_offset + uio->uio_resid;
1475 oldlen = 0; /* XXXgcc */
1476 olddata = NULL;
1477 if (rn->rn_dlen < newlen) {
1478 oldlen = rn->rn_dlen;
1479 olddata = rn->rn_data;
1480
1481 rn->rn_data = rump_hypermalloc(newlen, 0, true, "rumpfs");
1482 rn->rn_dlen = newlen;
1483 memset(rn->rn_data, 0, newlen);
1484 memcpy(rn->rn_data, olddata, oldlen);
1485 allocd = true;
1486 uvm_vnp_setsize(vp, newlen);
1487 }
1488
1489 /* ok, we have enough stooorage. write */
1490 while (uio->uio_resid > 0) {
1491 chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1492 if (chunk == 0)
1493 break;
1494 error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1495 UBC_WRITE | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
1496 if (error)
1497 break;
1498 }
1499
1500 if (allocd) {
1501 if (error) {
1502 rump_hyperfree(rn->rn_data, newlen);
1503 rn->rn_data = olddata;
1504 rn->rn_dlen = oldlen;
1505 uvm_vnp_setsize(vp, oldlen);
1506 } else {
1507 rump_hyperfree(olddata, oldlen);
1508 }
1509 }
1510
1511 return error;
1512 }
1513
1514 static int
1515 rump_vop_bmap(void *v)
1516 {
1517 struct vop_bmap_args /* {
1518 struct vnode *a_vp;
1519 daddr_t a_bn;
1520 struct vnode **a_vpp;
1521 daddr_t *a_bnp;
1522 int *a_runp;
1523 } */ *ap = v;
1524
1525 /* 1:1 mapping */
1526 if (ap->a_vpp)
1527 *ap->a_vpp = ap->a_vp;
1528 if (ap->a_bnp)
1529 *ap->a_bnp = ap->a_bn;
1530 if (ap->a_runp)
1531 *ap->a_runp = 16;
1532
1533 return 0;
1534 }
1535
1536 static int
1537 rump_vop_strategy(void *v)
1538 {
1539 struct vop_strategy_args /* {
1540 struct vnode *a_vp;
1541 struct buf *a_bp;
1542 } */ *ap = v;
1543 struct vnode *vp = ap->a_vp;
1544 struct rumpfs_node *rn = vp->v_data;
1545 struct buf *bp = ap->a_bp;
1546 off_t copylen, copyoff;
1547 int error;
1548
1549 if (vp->v_type != VREG || rn->rn_flags & RUMPNODE_ET_PHONE_HOST) {
1550 error = EINVAL;
1551 goto out;
1552 }
1553
1554 copyoff = bp->b_blkno << DEV_BSHIFT;
1555 copylen = MIN(rn->rn_dlen - copyoff, bp->b_bcount);
1556 if (BUF_ISWRITE(bp)) {
1557 memcpy((uint8_t *)rn->rn_data + copyoff, bp->b_data, copylen);
1558 } else {
1559 memset((uint8_t*)bp->b_data + copylen, 0, bp->b_bcount-copylen);
1560 memcpy(bp->b_data, (uint8_t *)rn->rn_data + copyoff, copylen);
1561 }
1562 bp->b_resid = 0;
1563 error = 0;
1564
1565 out:
1566 bp->b_error = error;
1567 biodone(bp);
1568 return 0;
1569 }
1570
1571 static int
1572 rump_vop_pathconf(void *v)
1573 {
1574 struct vop_pathconf_args /* {
1575 struct vnode *a_vp;
1576 int a_name;
1577 register_t *a_retval;
1578 }; */ *ap = v;
1579 int name = ap->a_name;
1580 register_t *retval = ap->a_retval;
1581
1582 switch (name) {
1583 case _PC_LINK_MAX:
1584 *retval = LINK_MAX;
1585 return 0;
1586 case _PC_NAME_MAX:
1587 *retval = RUMPFS_MAXNAMLEN;
1588 return 0;
1589 case _PC_PATH_MAX:
1590 *retval = PATH_MAX;
1591 return 0;
1592 case _PC_PIPE_BUF:
1593 *retval = PIPE_BUF;
1594 return 0;
1595 case _PC_CHOWN_RESTRICTED:
1596 *retval = 1;
1597 return 0;
1598 case _PC_NO_TRUNC:
1599 *retval = 1;
1600 return 0;
1601 case _PC_SYNC_IO:
1602 *retval = 1;
1603 return 0;
1604 case _PC_FILESIZEBITS:
1605 *retval = 43; /* this one goes to 11 */
1606 return 0;
1607 case _PC_SYMLINK_MAX:
1608 *retval = MAXPATHLEN;
1609 return 0;
1610 case _PC_2_SYMLINKS:
1611 *retval = 1;
1612 return 0;
1613 default:
1614 return EINVAL;
1615 }
1616 }
1617
1618 static int
1619 rump_vop_success(void *v)
1620 {
1621
1622 return 0;
1623 }
1624
1625 static int
1626 rump_vop_inactive(void *v)
1627 {
1628 struct vop_inactive_args /* {
1629 struct vnode *a_vp;
1630 bool *a_recycle;
1631 } */ *ap = v;
1632 struct vnode *vp = ap->a_vp;
1633 struct rumpfs_node *rn = vp->v_data;
1634
1635 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST && vp->v_type == VREG) {
1636 if (rn->rn_readfd != -1) {
1637 rumpuser_close(rn->rn_readfd);
1638 rn->rn_readfd = -1;
1639 }
1640 if (rn->rn_writefd != -1) {
1641 rumpuser_close(rn->rn_writefd);
1642 rn->rn_writefd = -1;
1643 }
1644 }
1645 *ap->a_recycle = (rn->rn_flags & RUMPNODE_CANRECLAIM) ? true : false;
1646
1647 VOP_UNLOCK(vp);
1648 return 0;
1649 }
1650
1651 static int
1652 rump_vop_reclaim(void *v)
1653 {
1654 struct vop_reclaim_args /* {
1655 struct vnode *a_vp;
1656 } */ *ap = v;
1657 struct vnode *vp = ap->a_vp;
1658 struct rumpfs_node *rn = vp->v_data;
1659
1660 mutex_enter(&reclock);
1661 rn->rn_vp = NULL;
1662 mutex_exit(&reclock);
1663 genfs_node_destroy(vp);
1664 vp->v_data = NULL;
1665
1666 if (rn->rn_flags & RUMPNODE_CANRECLAIM) {
1667 if (vp->v_type == VREG
1668 && (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0
1669 && rn->rn_data) {
1670 rump_hyperfree(rn->rn_data, rn->rn_dlen);
1671 rn->rn_data = NULL;
1672 }
1673
1674 if (vp->v_type == VLNK)
1675 PNBUF_PUT(rn->rn_linktarg);
1676 if (rn->rn_hostpath)
1677 free(rn->rn_hostpath, M_TEMP);
1678 kmem_free(rn, sizeof(*rn));
1679 }
1680
1681 return 0;
1682 }
1683
1684 static int
1685 rump_vop_spec(void *v)
1686 {
1687 struct vop_generic_args *ap = v;
1688 int (**opvec)(void *);
1689
1690 switch (ap->a_desc->vdesc_offset) {
1691 case VOP_ACCESS_DESCOFFSET:
1692 case VOP_GETATTR_DESCOFFSET:
1693 case VOP_SETATTR_DESCOFFSET:
1694 case VOP_LOCK_DESCOFFSET:
1695 case VOP_UNLOCK_DESCOFFSET:
1696 case VOP_ISLOCKED_DESCOFFSET:
1697 case VOP_RECLAIM_DESCOFFSET:
1698 opvec = rump_vnodeop_p;
1699 break;
1700 default:
1701 opvec = spec_vnodeop_p;
1702 break;
1703 }
1704
1705 return VOCALL(opvec, ap->a_desc->vdesc_offset, v);
1706 }
1707
1708 static int
1709 rump_vop_advlock(void *v)
1710 {
1711 struct vop_advlock_args /* {
1712 const struct vnodeop_desc *a_desc;
1713 struct vnode *a_vp;
1714 void *a_id;
1715 int a_op;
1716 struct flock *a_fl;
1717 int a_flags;
1718 } */ *ap = v;
1719 struct vnode *vp = ap->a_vp;
1720 struct rumpfs_node *rn = vp->v_data;
1721
1722 return lf_advlock(ap, &rn->rn_lockf, vp->v_size);
1723 }
1724
1725 /*
1726 * Begin vfs-level stuff
1727 */
1728
1729 VFS_PROTOS(rumpfs);
1730 struct vfsops rumpfs_vfsops = {
1731 .vfs_name = MOUNT_RUMPFS,
1732 .vfs_min_mount_data = 0,
1733 .vfs_mount = rumpfs_mount,
1734 .vfs_start = (void *)nullop,
1735 .vfs_unmount = rumpfs_unmount,
1736 .vfs_root = rumpfs_root,
1737 .vfs_quotactl = (void *)eopnotsupp,
1738 .vfs_statvfs = genfs_statvfs,
1739 .vfs_sync = (void *)nullop,
1740 .vfs_vget = rumpfs_vget,
1741 .vfs_fhtovp = (void *)eopnotsupp,
1742 .vfs_vptofh = (void *)eopnotsupp,
1743 .vfs_init = rumpfs_init,
1744 .vfs_reinit = NULL,
1745 .vfs_done = rumpfs_done,
1746 .vfs_mountroot = rumpfs_mountroot,
1747 .vfs_snapshot = (void *)eopnotsupp,
1748 .vfs_extattrctl = (void *)eopnotsupp,
1749 .vfs_suspendctl = (void *)eopnotsupp,
1750 .vfs_renamelock_enter = genfs_renamelock_enter,
1751 .vfs_renamelock_exit = genfs_renamelock_exit,
1752 .vfs_opv_descs = rump_opv_descs,
1753 /* vfs_refcount */
1754 /* vfs_list */
1755 };
1756
1757 static int
1758 rumpfs_mountfs(struct mount *mp)
1759 {
1760 struct rumpfs_mount *rfsmp;
1761 struct rumpfs_node *rn;
1762 int error;
1763
1764 rfsmp = kmem_alloc(sizeof(*rfsmp), KM_SLEEP);
1765
1766 rn = makeprivate(VDIR, RUMPFS_DEFAULTMODE, NODEV, DEV_BSIZE, false);
1767 rn->rn_parent = rn;
1768 if ((error = makevnode(mp, rn, &rfsmp->rfsmp_rvp)) != 0)
1769 return error;
1770
1771 rfsmp->rfsmp_rvp->v_vflag |= VV_ROOT;
1772 VOP_UNLOCK(rfsmp->rfsmp_rvp);
1773
1774 mp->mnt_data = rfsmp;
1775 mp->mnt_stat.f_namemax = RUMPFS_MAXNAMLEN;
1776 mp->mnt_stat.f_iosize = 512;
1777 mp->mnt_flag |= MNT_LOCAL;
1778 mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO;
1779 mp->mnt_fs_bshift = DEV_BSHIFT;
1780 vfs_getnewfsid(mp);
1781
1782 return 0;
1783 }
1784
1785 int
1786 rumpfs_mount(struct mount *mp, const char *mntpath, void *arg, size_t *alen)
1787 {
1788 int error;
1789
1790 if (mp->mnt_flag & MNT_UPDATE) {
1791 return 0;
1792 }
1793
1794 error = set_statvfs_info(mntpath, UIO_USERSPACE, "rumpfs", UIO_SYSSPACE,
1795 mp->mnt_op->vfs_name, mp, curlwp);
1796 if (error)
1797 return error;
1798
1799 return rumpfs_mountfs(mp);
1800 }
1801
1802 int
1803 rumpfs_unmount(struct mount *mp, int mntflags)
1804 {
1805 struct rumpfs_mount *rfsmp = mp->mnt_data;
1806 int flags = 0, error;
1807
1808 if (panicstr || mntflags & MNT_FORCE)
1809 flags |= FORCECLOSE;
1810
1811 if ((error = vflush(mp, rfsmp->rfsmp_rvp, flags)) != 0)
1812 return error;
1813 vgone(rfsmp->rfsmp_rvp); /* XXX */
1814
1815 kmem_free(rfsmp, sizeof(*rfsmp));
1816
1817 return 0;
1818 }
1819
1820 int
1821 rumpfs_root(struct mount *mp, struct vnode **vpp)
1822 {
1823 struct rumpfs_mount *rfsmp = mp->mnt_data;
1824
1825 vref(rfsmp->rfsmp_rvp);
1826 vn_lock(rfsmp->rfsmp_rvp, LK_EXCLUSIVE | LK_RETRY);
1827 *vpp = rfsmp->rfsmp_rvp;
1828 return 0;
1829 }
1830
1831 int
1832 rumpfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1833 {
1834
1835 return EOPNOTSUPP;
1836 }
1837
1838 void
1839 rumpfs_init()
1840 {
1841
1842 CTASSERT(RUMP_ETFS_SIZE_ENDOFF == RUMPBLK_SIZENOTSET);
1843
1844 mutex_init(&reclock, MUTEX_DEFAULT, IPL_NONE);
1845 mutex_init(&etfs_lock, MUTEX_DEFAULT, IPL_NONE);
1846 }
1847
1848 void
1849 rumpfs_done()
1850 {
1851
1852 mutex_destroy(&reclock);
1853 mutex_destroy(&etfs_lock);
1854 }
1855
1856 int
1857 rumpfs_mountroot()
1858 {
1859 struct mount *mp;
1860 int error;
1861
1862 if ((error = vfs_rootmountalloc(MOUNT_RUMPFS, "rootdev", &mp)) != 0) {
1863 vrele(rootvp);
1864 return error;
1865 }
1866
1867 if ((error = rumpfs_mountfs(mp)) != 0)
1868 panic("mounting rootfs failed: %d", error);
1869
1870 mutex_enter(&mountlist_lock);
1871 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1872 mutex_exit(&mountlist_lock);
1873
1874 error = set_statvfs_info("/", UIO_SYSSPACE, "rumpfs", UIO_SYSSPACE,
1875 mp->mnt_op->vfs_name, mp, curlwp);
1876 if (error)
1877 panic("set_statvfs_info failed for rootfs: %d", error);
1878
1879 mp->mnt_flag &= ~MNT_RDONLY;
1880 vfs_unbusy(mp, false, NULL);
1881
1882 return 0;
1883 }
1884