rumpfs.c revision 1.113 1 /* $NetBSD: rumpfs.c,v 1.113 2013/04/29 20:08:49 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2009, 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: rumpfs.c,v 1.113 2013/04/29 20:08:49 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/atomic.h>
33 #include <sys/buf.h>
34 #include <sys/dirent.h>
35 #include <sys/errno.h>
36 #include <sys/filedesc.h>
37 #include <sys/fcntl.h>
38 #include <sys/kauth.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/lock.h>
44 #include <sys/lockf.h>
45 #include <sys/queue.h>
46 #include <sys/stat.h>
47 #include <sys/syscallargs.h>
48 #include <sys/vnode.h>
49 #include <sys/unistd.h>
50
51 #include <miscfs/fifofs/fifo.h>
52 #include <miscfs/specfs/specdev.h>
53 #include <miscfs/genfs/genfs.h>
54 #include <miscfs/genfs/genfs_node.h>
55
56 #include <uvm/uvm_extern.h>
57
58 #include <rump/rumpuser.h>
59
60 #include "rump_private.h"
61 #include "rump_vfs_private.h"
62
63 static int rump_vop_lookup(void *);
64 static int rump_vop_getattr(void *);
65 static int rump_vop_setattr(void *);
66 static int rump_vop_mkdir(void *);
67 static int rump_vop_rmdir(void *);
68 static int rump_vop_remove(void *);
69 static int rump_vop_mknod(void *);
70 static int rump_vop_create(void *);
71 static int rump_vop_inactive(void *);
72 static int rump_vop_reclaim(void *);
73 static int rump_vop_success(void *);
74 static int rump_vop_readdir(void *);
75 static int rump_vop_spec(void *);
76 static int rump_vop_read(void *);
77 static int rump_vop_write(void *);
78 static int rump_vop_open(void *);
79 static int rump_vop_symlink(void *);
80 static int rump_vop_readlink(void *);
81 static int rump_vop_whiteout(void *);
82 static int rump_vop_pathconf(void *);
83 static int rump_vop_bmap(void *);
84 static int rump_vop_strategy(void *);
85 static int rump_vop_advlock(void *);
86 static int rump_vop_access(void *);
87
88 int (**fifo_vnodeop_p)(void *);
89 const struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
90 { &vop_default_desc, vn_default_error },
91 { &vop_putpages_desc, genfs_null_putpages },
92 { NULL, NULL }
93 };
94 const struct vnodeopv_desc fifo_vnodeop_opv_desc =
95 { &fifo_vnodeop_p, fifo_vnodeop_entries };
96
97 int (**rump_vnodeop_p)(void *);
98 const struct vnodeopv_entry_desc rump_vnodeop_entries[] = {
99 { &vop_default_desc, vn_default_error },
100 { &vop_lookup_desc, rump_vop_lookup },
101 { &vop_getattr_desc, rump_vop_getattr },
102 { &vop_setattr_desc, rump_vop_setattr },
103 { &vop_mkdir_desc, rump_vop_mkdir },
104 { &vop_rmdir_desc, rump_vop_rmdir },
105 { &vop_remove_desc, rump_vop_remove },
106 { &vop_mknod_desc, rump_vop_mknod },
107 { &vop_create_desc, rump_vop_create },
108 { &vop_symlink_desc, rump_vop_symlink },
109 { &vop_readlink_desc, rump_vop_readlink },
110 { &vop_access_desc, rump_vop_access },
111 { &vop_readdir_desc, rump_vop_readdir },
112 { &vop_read_desc, rump_vop_read },
113 { &vop_write_desc, rump_vop_write },
114 { &vop_open_desc, rump_vop_open },
115 { &vop_close_desc, genfs_nullop },
116 { &vop_seek_desc, genfs_seek },
117 { &vop_getpages_desc, genfs_getpages },
118 { &vop_putpages_desc, genfs_putpages },
119 { &vop_whiteout_desc, rump_vop_whiteout },
120 { &vop_fsync_desc, rump_vop_success },
121 { &vop_lock_desc, genfs_lock },
122 { &vop_unlock_desc, genfs_unlock },
123 { &vop_islocked_desc, genfs_islocked },
124 { &vop_inactive_desc, rump_vop_inactive },
125 { &vop_reclaim_desc, rump_vop_reclaim },
126 { &vop_link_desc, genfs_eopnotsupp },
127 { &vop_pathconf_desc, rump_vop_pathconf },
128 { &vop_bmap_desc, rump_vop_bmap },
129 { &vop_strategy_desc, rump_vop_strategy },
130 { &vop_advlock_desc, rump_vop_advlock },
131 { NULL, NULL }
132 };
133 const struct vnodeopv_desc rump_vnodeop_opv_desc =
134 { &rump_vnodeop_p, rump_vnodeop_entries };
135
136 int (**rump_specop_p)(void *);
137 const struct vnodeopv_entry_desc rump_specop_entries[] = {
138 { &vop_default_desc, rump_vop_spec },
139 { NULL, NULL }
140 };
141 const struct vnodeopv_desc rump_specop_opv_desc =
142 { &rump_specop_p, rump_specop_entries };
143
144 const struct vnodeopv_desc * const rump_opv_descs[] = {
145 &rump_vnodeop_opv_desc,
146 &rump_specop_opv_desc,
147 NULL
148 };
149
150 #define RUMPFS_WHITEOUT ((void *)-1)
151 #define RDENT_ISWHITEOUT(rdp) (rdp->rd_node == RUMPFS_WHITEOUT)
152 struct rumpfs_dent {
153 char *rd_name;
154 int rd_namelen;
155 struct rumpfs_node *rd_node;
156
157 LIST_ENTRY(rumpfs_dent) rd_entries;
158 };
159
160 struct genfs_ops rumpfs_genfsops = {
161 .gop_size = genfs_size,
162 .gop_write = genfs_gop_write,
163
164 /* optional */
165 .gop_alloc = NULL,
166 .gop_markupdate = NULL,
167 };
168
169 struct rumpfs_node {
170 struct genfs_node rn_gn;
171 struct vattr rn_va;
172 struct vnode *rn_vp;
173 char *rn_hostpath;
174 int rn_flags;
175 struct lockf *rn_lockf;
176
177 union {
178 struct { /* VREG */
179 int readfd;
180 int writefd;
181 uint64_t offset;
182 } reg;
183 struct {
184 void *data;
185 size_t dlen;
186 } reg_noet;
187 struct { /* VDIR */
188 LIST_HEAD(, rumpfs_dent) dents;
189 struct rumpfs_node *parent;
190 int flags;
191 } dir;
192 struct {
193 char *target;
194 size_t len;
195 } link;
196 } rn_u;
197 };
198 #define rn_readfd rn_u.reg.readfd
199 #define rn_writefd rn_u.reg.writefd
200 #define rn_offset rn_u.reg.offset
201 #define rn_data rn_u.reg_noet.data
202 #define rn_dlen rn_u.reg_noet.dlen
203 #define rn_dir rn_u.dir.dents
204 #define rn_parent rn_u.dir.parent
205 #define rn_linktarg rn_u.link.target
206 #define rn_linklen rn_u.link.len
207
208 #define RUMPNODE_CANRECLAIM 0x01
209 #define RUMPNODE_DIR_ET 0x02
210 #define RUMPNODE_DIR_ETSUBS 0x04
211 #define RUMPNODE_ET_PHONE_HOST 0x10
212
213 struct rumpfs_mount {
214 struct vnode *rfsmp_rvp;
215 };
216
217 #define INO_WHITEOUT 1
218 static int lastino = 2;
219 static kmutex_t reclock;
220
221 static void freedir(struct rumpfs_node *, struct componentname *);
222 static struct rumpfs_node *makeprivate(enum vtype, dev_t, off_t, bool);
223
224 /*
225 * Extra Terrestrial stuff. We map a given key (pathname) to a file on
226 * the host FS. ET phones home only from the root node of rumpfs.
227 *
228 * When an etfs node is removed, a vnode potentially behind it is not
229 * immediately recycled.
230 */
231
232 struct etfs {
233 char et_key[MAXPATHLEN];
234 size_t et_keylen;
235 bool et_prefixkey;
236 bool et_removing;
237 devminor_t et_blkmin;
238
239 LIST_ENTRY(etfs) et_entries;
240
241 struct rumpfs_node *et_rn;
242 };
243 static kmutex_t etfs_lock;
244 static LIST_HEAD(, etfs) etfs_list = LIST_HEAD_INITIALIZER(etfs_list);
245
246 static enum vtype
247 ettype_to_vtype(enum rump_etfs_type et)
248 {
249 enum vtype vt;
250
251 switch (et) {
252 case RUMP_ETFS_REG:
253 vt = VREG;
254 break;
255 case RUMP_ETFS_BLK:
256 vt = VBLK;
257 break;
258 case RUMP_ETFS_CHR:
259 vt = VCHR;
260 break;
261 case RUMP_ETFS_DIR:
262 vt = VDIR;
263 break;
264 case RUMP_ETFS_DIR_SUBDIRS:
265 vt = VDIR;
266 break;
267 default:
268 panic("invalid et type: %d", et);
269 }
270
271 return vt;
272 }
273
274 static enum vtype
275 hft_to_vtype(int hft)
276 {
277 enum vtype vt;
278
279 switch (hft) {
280 case RUMPUSER_FT_OTHER:
281 vt = VNON;
282 break;
283 case RUMPUSER_FT_DIR:
284 vt = VDIR;
285 break;
286 case RUMPUSER_FT_REG:
287 vt = VREG;
288 break;
289 case RUMPUSER_FT_BLK:
290 vt = VBLK;
291 break;
292 case RUMPUSER_FT_CHR:
293 vt = VCHR;
294 break;
295 default:
296 vt = VNON;
297 break;
298 }
299
300 return vt;
301 }
302
303 static bool
304 etfs_find(const char *key, struct etfs **etp, bool forceprefix)
305 {
306 struct etfs *et;
307 size_t keylen = strlen(key);
308
309 KASSERT(mutex_owned(&etfs_lock));
310
311 LIST_FOREACH(et, &etfs_list, et_entries) {
312 if ((keylen == et->et_keylen || et->et_prefixkey || forceprefix)
313 && strncmp(key, et->et_key, et->et_keylen) == 0) {
314 if (etp)
315 *etp = et;
316 return true;
317 }
318 }
319
320 return false;
321 }
322
323 #define REGDIR(ftype) \
324 ((ftype) == RUMP_ETFS_DIR || (ftype) == RUMP_ETFS_DIR_SUBDIRS)
325 static int
326 doregister(const char *key, const char *hostpath,
327 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
328 {
329 char buf[9];
330 struct etfs *et;
331 struct rumpfs_node *rn;
332 uint64_t fsize;
333 dev_t rdev = NODEV;
334 devminor_t dmin = -1;
335 int hft, error;
336
337 if (key[0] != '/') {
338 return EINVAL;
339 }
340 while (key[0] == '/') {
341 key++;
342 }
343
344 if (rumpuser_getfileinfo(hostpath, &fsize, &hft, &error))
345 return error;
346
347 /* etfs directory requires a directory on the host */
348 if (REGDIR(ftype)) {
349 if (hft != RUMPUSER_FT_DIR)
350 return ENOTDIR;
351 if (begin != 0)
352 return EISDIR;
353 if (size != RUMP_ETFS_SIZE_ENDOFF)
354 return EISDIR;
355 size = fsize;
356 } else {
357 if (begin > fsize)
358 return EINVAL;
359 if (size == RUMP_ETFS_SIZE_ENDOFF)
360 size = fsize - begin;
361 if (begin + size > fsize)
362 return EINVAL;
363 }
364
365 if (ftype == RUMP_ETFS_BLK || ftype == RUMP_ETFS_CHR) {
366 error = rumpblk_register(hostpath, &dmin, begin, size);
367 if (error != 0) {
368 return error;
369 }
370 rdev = makedev(RUMPBLK_DEVMAJOR, dmin);
371 }
372
373 et = kmem_alloc(sizeof(*et), KM_SLEEP);
374 strcpy(et->et_key, key);
375 et->et_keylen = strlen(et->et_key);
376 et->et_rn = rn = makeprivate(ettype_to_vtype(ftype), rdev, size, true);
377 et->et_removing = false;
378 et->et_blkmin = dmin;
379
380 rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
381
382 if (ftype == RUMP_ETFS_REG || REGDIR(ftype) || et->et_blkmin != -1) {
383 size_t len = strlen(hostpath)+1;
384
385 rn->rn_hostpath = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
386 memcpy(rn->rn_hostpath, hostpath, len);
387 rn->rn_offset = begin;
388 }
389
390 if (REGDIR(ftype)) {
391 rn->rn_flags |= RUMPNODE_DIR_ET;
392 et->et_prefixkey = true;
393 } else {
394 et->et_prefixkey = false;
395 }
396
397 if (ftype == RUMP_ETFS_DIR_SUBDIRS)
398 rn->rn_flags |= RUMPNODE_DIR_ETSUBS;
399
400 mutex_enter(&etfs_lock);
401 if (etfs_find(key, NULL, REGDIR(ftype))) {
402 mutex_exit(&etfs_lock);
403 if (et->et_blkmin != -1)
404 rumpblk_deregister(hostpath);
405 if (et->et_rn->rn_hostpath != NULL)
406 free(et->et_rn->rn_hostpath, M_TEMP);
407 kmem_free(et->et_rn, sizeof(*et->et_rn));
408 kmem_free(et, sizeof(*et));
409 return EEXIST;
410 }
411 LIST_INSERT_HEAD(&etfs_list, et, et_entries);
412 mutex_exit(&etfs_lock);
413
414 if (ftype == RUMP_ETFS_BLK) {
415 format_bytes(buf, sizeof(buf), size);
416 aprint_verbose("/%s: hostpath %s (%s)\n", key, hostpath, buf);
417 }
418
419 return 0;
420 }
421 #undef REGDIR
422
423 int
424 rump_etfs_register(const char *key, const char *hostpath,
425 enum rump_etfs_type ftype)
426 {
427
428 return doregister(key, hostpath, ftype, 0, RUMP_ETFS_SIZE_ENDOFF);
429 }
430
431 int
432 rump_etfs_register_withsize(const char *key, const char *hostpath,
433 enum rump_etfs_type ftype, uint64_t begin, uint64_t size)
434 {
435
436 return doregister(key, hostpath, ftype, begin, size);
437 }
438
439 /* remove etfs mapping. caller's responsibility to make sure it's not in use */
440 int
441 rump_etfs_remove(const char *key)
442 {
443 struct etfs *et;
444 size_t keylen;
445 int rv;
446
447 if (key[0] != '/') {
448 return EINVAL;
449 }
450 while (key[0] == '/') {
451 key++;
452 }
453
454 keylen = strlen(key);
455
456 mutex_enter(&etfs_lock);
457 LIST_FOREACH(et, &etfs_list, et_entries) {
458 if (keylen == et->et_keylen && strcmp(et->et_key, key) == 0) {
459 if (et->et_removing)
460 et = NULL;
461 else
462 et->et_removing = true;
463 break;
464 }
465 }
466 mutex_exit(&etfs_lock);
467 if (!et)
468 return ENOENT;
469
470 /*
471 * ok, we know what we want to remove and have signalled there
472 * actually are men at work. first, unregister from rumpblk
473 */
474 if (et->et_blkmin != -1) {
475 rv = rumpblk_deregister(et->et_rn->rn_hostpath);
476 } else {
477 rv = 0;
478 }
479 KASSERT(rv == 0);
480
481 /* then do the actual removal */
482 mutex_enter(&etfs_lock);
483 LIST_REMOVE(et, et_entries);
484 mutex_exit(&etfs_lock);
485
486 /* node is unreachable, safe to nuke all device copies */
487 if (et->et_blkmin != -1) {
488 vdevgone(RUMPBLK_DEVMAJOR, et->et_blkmin, et->et_blkmin, VBLK);
489 } else {
490 struct vnode *vp;
491
492 mutex_enter(&reclock);
493 if ((vp = et->et_rn->rn_vp) != NULL)
494 mutex_enter(vp->v_interlock);
495 mutex_exit(&reclock);
496 if (vp && vget(vp, 0) == 0)
497 vgone(vp);
498 }
499
500 if (et->et_rn->rn_hostpath != NULL)
501 free(et->et_rn->rn_hostpath, M_TEMP);
502 kmem_free(et->et_rn, sizeof(*et->et_rn));
503 kmem_free(et, sizeof(*et));
504
505 return 0;
506 }
507
508 /*
509 * rumpfs
510 */
511
512 static struct rumpfs_node *
513 makeprivate(enum vtype vt, dev_t rdev, off_t size, bool et)
514 {
515 struct rumpfs_node *rn;
516 struct vattr *va;
517 struct timespec ts;
518
519 rn = kmem_zalloc(sizeof(*rn), KM_SLEEP);
520
521 switch (vt) {
522 case VDIR:
523 LIST_INIT(&rn->rn_dir);
524 break;
525 case VREG:
526 if (et) {
527 rn->rn_readfd = -1;
528 rn->rn_writefd = -1;
529 }
530 break;
531 default:
532 break;
533 }
534
535 nanotime(&ts);
536
537 va = &rn->rn_va;
538 va->va_type = vt;
539 va->va_mode = 0755;
540 if (vt == VDIR)
541 va->va_nlink = 2;
542 else
543 va->va_nlink = 1;
544 va->va_uid = 0;
545 va->va_gid = 0;
546 va->va_fsid =
547 va->va_fileid = atomic_inc_uint_nv(&lastino);
548 va->va_size = size;
549 va->va_blocksize = 512;
550 va->va_atime = ts;
551 va->va_mtime = ts;
552 va->va_ctime = ts;
553 va->va_birthtime = ts;
554 va->va_gen = 0;
555 va->va_flags = 0;
556 va->va_rdev = rdev;
557 va->va_bytes = 512;
558 va->va_filerev = 0;
559 va->va_vaflags = 0;
560
561 return rn;
562 }
563
564 static int
565 makevnode(struct mount *mp, struct rumpfs_node *rn, struct vnode **vpp)
566 {
567 struct vnode *vp;
568 int (**vpops)(void *);
569 struct vattr *va = &rn->rn_va;
570 int rv;
571
572 KASSERT(!mutex_owned(&reclock));
573
574 if (va->va_type == VCHR || va->va_type == VBLK) {
575 vpops = rump_specop_p;
576 } else {
577 vpops = rump_vnodeop_p;
578 }
579
580 rv = getnewvnode(VT_RUMP, mp, vpops, NULL, &vp);
581 if (rv)
582 return rv;
583
584 vp->v_size = vp->v_writesize = va->va_size;
585 vp->v_type = va->va_type;
586
587 if (vpops == rump_specop_p) {
588 spec_node_init(vp, va->va_rdev);
589 }
590 vp->v_data = rn;
591
592 genfs_node_init(vp, &rumpfs_genfsops);
593 vn_lock(vp, LK_RETRY | LK_EXCLUSIVE);
594 mutex_enter(&reclock);
595 rn->rn_vp = vp;
596 mutex_exit(&reclock);
597
598 *vpp = vp;
599
600 return 0;
601 }
602
603
604 static void
605 makedir(struct rumpfs_node *rnd,
606 struct componentname *cnp, struct rumpfs_node *rn)
607 {
608 struct rumpfs_dent *rdent;
609
610 rdent = kmem_alloc(sizeof(*rdent), KM_SLEEP);
611 rdent->rd_name = kmem_alloc(cnp->cn_namelen+1, KM_SLEEP);
612 rdent->rd_node = rn;
613 strlcpy(rdent->rd_name, cnp->cn_nameptr, cnp->cn_namelen+1);
614 rdent->rd_namelen = strlen(rdent->rd_name);
615
616 if ((cnp->cn_flags & ISWHITEOUT) != 0) {
617 KASSERT((cnp->cn_flags & DOWHITEOUT) == 0);
618 freedir(rnd, cnp);
619 }
620 LIST_INSERT_HEAD(&rnd->rn_dir, rdent, rd_entries);
621 }
622
623 static void
624 freedir(struct rumpfs_node *rnd, struct componentname *cnp)
625 {
626 struct rumpfs_dent *rd = NULL;
627
628 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
629 if (rd->rd_namelen == cnp->cn_namelen &&
630 strncmp(rd->rd_name, cnp->cn_nameptr,
631 cnp->cn_namelen) == 0)
632 break;
633 }
634 if (rd == NULL)
635 panic("could not find directory entry: %s", cnp->cn_nameptr);
636
637 if (cnp->cn_flags & DOWHITEOUT) {
638 rd->rd_node = RUMPFS_WHITEOUT;
639 } else {
640 LIST_REMOVE(rd, rd_entries);
641 kmem_free(rd->rd_name, rd->rd_namelen+1);
642 kmem_free(rd, sizeof(*rd));
643 }
644 }
645
646 /*
647 * Simple lookup for rump file systems.
648 *
649 * uhm, this is twisted. C F C C, hope of C C F C looming
650 */
651 static int
652 rump_vop_lookup(void *v)
653 {
654 struct vop_lookup_args /* {
655 struct vnode *a_dvp;
656 struct vnode **a_vpp;
657 struct componentname *a_cnp;
658 }; */ *ap = v;
659 struct componentname *cnp = ap->a_cnp;
660 struct vnode *dvp = ap->a_dvp;
661 struct vnode **vpp = ap->a_vpp;
662 struct vnode *vp;
663 struct rumpfs_node *rnd = dvp->v_data, *rn;
664 struct rumpfs_dent *rd = NULL;
665 struct etfs *et;
666 bool dotdot = (cnp->cn_flags & ISDOTDOT) != 0;
667 int rv = 0;
668 const char *cp;
669
670 *vpp = NULL;
671
672 rv = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
673 if (rv)
674 return rv;
675
676 if ((cnp->cn_flags & ISLASTCN)
677 && (dvp->v_mount->mnt_flag & MNT_RDONLY)
678 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
679 return EROFS;
680
681 /* check for dot, return directly if the case */
682 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
683 vref(dvp);
684 *vpp = dvp;
685 return 0;
686 }
687
688 /* we don't do rename */
689 if (!(((cnp->cn_flags & ISLASTCN) == 0) || (cnp->cn_nameiop != RENAME)))
690 return EOPNOTSUPP;
691
692 /* check for etfs */
693 if (dvp == rootvnode &&
694 (cnp->cn_nameiop == LOOKUP || cnp->cn_nameiop == CREATE)) {
695 bool found;
696 mutex_enter(&etfs_lock);
697 found = etfs_find(cnp->cn_nameptr, &et, false);
698 mutex_exit(&etfs_lock);
699
700 if (found) {
701 rn = et->et_rn;
702 cnp->cn_consume += et->et_keylen - cnp->cn_namelen;
703 /*
704 * consume trailing slashes if any and clear
705 * REQUIREDIR if we consumed the full path.
706 */
707 cp = &cnp->cn_nameptr[cnp->cn_namelen];
708 cp += cnp->cn_consume;
709 KASSERT(*cp == '\0' || *cp == '/');
710 if (*cp == '\0' && rn->rn_va.va_type != VDIR)
711 cnp->cn_flags &= ~REQUIREDIR;
712 while (*cp++ == '/')
713 cnp->cn_consume++;
714 goto getvnode;
715 }
716 }
717
718 if (rnd->rn_flags & RUMPNODE_DIR_ET) {
719 uint64_t fsize;
720 char *newpath;
721 size_t newpathlen;
722 int hft, error;
723
724 if (dotdot)
725 return EOPNOTSUPP;
726
727 newpathlen = strlen(rnd->rn_hostpath) + 1 + cnp->cn_namelen + 1;
728 newpath = malloc(newpathlen, M_TEMP, M_WAITOK);
729
730 strlcpy(newpath, rnd->rn_hostpath, newpathlen);
731 strlcat(newpath, "/", newpathlen);
732 strlcat(newpath, cnp->cn_nameptr, newpathlen);
733
734 if (rumpuser_getfileinfo(newpath, &fsize, &hft, &error)) {
735 free(newpath, M_TEMP);
736 return error;
737 }
738
739 /* allow only dirs and regular files */
740 if (hft != RUMPUSER_FT_REG && hft != RUMPUSER_FT_DIR) {
741 free(newpath, M_TEMP);
742 return ENOENT;
743 }
744
745 rn = makeprivate(hft_to_vtype(hft), NODEV, fsize, true);
746 rn->rn_flags |= RUMPNODE_CANRECLAIM;
747 if (rnd->rn_flags & RUMPNODE_DIR_ETSUBS) {
748 rn->rn_flags |= RUMPNODE_DIR_ET | RUMPNODE_DIR_ETSUBS;
749 rn->rn_flags |= RUMPNODE_ET_PHONE_HOST;
750 }
751 rn->rn_hostpath = newpath;
752
753 goto getvnode;
754 } else {
755 if (dotdot) {
756 if ((rn = rnd->rn_parent) != NULL)
757 goto getvnode;
758 } else {
759 LIST_FOREACH(rd, &rnd->rn_dir, rd_entries) {
760 if (rd->rd_namelen == cnp->cn_namelen &&
761 strncmp(rd->rd_name, cnp->cn_nameptr,
762 cnp->cn_namelen) == 0)
763 break;
764 }
765 }
766 }
767
768 if (!rd && ((cnp->cn_flags & ISLASTCN) == 0||cnp->cn_nameiop != CREATE))
769 return ENOENT;
770
771 if (!rd && (cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
772 if (dvp->v_mount->mnt_flag & MNT_RDONLY)
773 return EROFS;
774 rv = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
775 if (rv)
776 return rv;
777 return EJUSTRETURN;
778 }
779
780 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == DELETE) {
781 rv = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
782 if (rv)
783 return rv;
784 }
785
786 if (RDENT_ISWHITEOUT(rd)) {
787 cnp->cn_flags |= ISWHITEOUT;
788 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE)
789 return EJUSTRETURN;
790 return ENOENT;
791 }
792
793 rn = rd->rd_node;
794
795 getvnode:
796 KASSERT(rn);
797 if (dotdot)
798 VOP_UNLOCK(dvp);
799 mutex_enter(&reclock);
800 if ((vp = rn->rn_vp)) {
801 mutex_enter(vp->v_interlock);
802 mutex_exit(&reclock);
803 if (vget(vp, LK_EXCLUSIVE)) {
804 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
805 goto getvnode;
806 }
807 *vpp = vp;
808 } else {
809 mutex_exit(&reclock);
810 rv = makevnode(dvp->v_mount, rn, vpp);
811 }
812 if (dotdot)
813 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
814
815 return rv;
816 }
817
818 static int
819 rump_check_possible(struct vnode *vp, struct rumpfs_node *rnode,
820 mode_t mode)
821 {
822
823 if ((mode & VWRITE) == 0)
824 return 0;
825
826 switch (vp->v_type) {
827 case VDIR:
828 case VLNK:
829 case VREG:
830 break;
831 default:
832 /* special file is always writable. */
833 return 0;
834 }
835
836 return vp->v_mount->mnt_flag & MNT_RDONLY ? EROFS : 0;
837 }
838
839 static int
840 rump_check_permitted(struct vnode *vp, struct rumpfs_node *rnode,
841 mode_t mode, kauth_cred_t cred)
842 {
843 struct vattr *attr = &rnode->rn_va;
844
845 return genfs_can_access(vp->v_type, attr->va_mode, attr->va_uid,
846 attr->va_gid, mode, cred);
847 }
848
849 int
850 rump_vop_access(void *v)
851 {
852 struct vop_access_args /* {
853 const struct vnodeop_desc *a_desc;
854 struct vnode *a_vp;
855 int a_mode;
856 kauth_cred_t a_cred;
857 } */ *ap = v;
858 struct vnode *vp = ap->a_vp;
859 struct rumpfs_node *rn = vp->v_data;
860 int error;
861
862 error = rump_check_possible(vp, rn, ap->a_mode);
863 if (error)
864 return error;
865
866 error = rump_check_permitted(vp, rn, ap->a_mode, ap->a_cred);
867
868 return error;
869 }
870
871 static int
872 rump_vop_getattr(void *v)
873 {
874 struct vop_getattr_args /* {
875 struct vnode *a_vp;
876 struct vattr *a_vap;
877 kauth_cred_t a_cred;
878 } */ *ap = v;
879 struct vnode *vp = ap->a_vp;
880 struct rumpfs_node *rn = vp->v_data;
881 struct vattr *vap = ap->a_vap;
882
883 memcpy(vap, &rn->rn_va, sizeof(struct vattr));
884 vap->va_size = vp->v_size;
885 return 0;
886 }
887
888 static int
889 rump_vop_setattr(void *v)
890 {
891 struct vop_setattr_args /* {
892 struct vnode *a_vp;
893 struct vattr *a_vap;
894 kauth_cred_t a_cred;
895 } */ *ap = v;
896 struct vnode *vp = ap->a_vp;
897 struct vattr *vap = ap->a_vap;
898 struct rumpfs_node *rn = vp->v_data;
899 struct vattr *attr = &rn->rn_va;
900 kauth_cred_t cred = ap->a_cred;
901 int error;
902
903 #define CHANGED(a, t) (vap->a != (t)VNOVAL)
904 #define SETIFVAL(a,t) if (CHANGED(a, t)) rn->rn_va.a = vap->a
905 if (CHANGED(va_atime.tv_sec, time_t) ||
906 CHANGED(va_ctime.tv_sec, time_t) ||
907 CHANGED(va_mtime.tv_sec, time_t) ||
908 CHANGED(va_birthtime.tv_sec, time_t) ||
909 CHANGED(va_atime.tv_nsec, long) ||
910 CHANGED(va_ctime.tv_nsec, long) ||
911 CHANGED(va_mtime.tv_nsec, long) ||
912 CHANGED(va_birthtime.tv_nsec, long)) {
913 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp,
914 NULL, genfs_can_chtimes(vp, vap->va_vaflags, attr->va_uid,
915 cred));
916 if (error)
917 return error;
918 }
919
920 SETIFVAL(va_atime.tv_sec, time_t);
921 SETIFVAL(va_ctime.tv_sec, time_t);
922 SETIFVAL(va_mtime.tv_sec, time_t);
923 SETIFVAL(va_birthtime.tv_sec, time_t);
924 SETIFVAL(va_atime.tv_nsec, long);
925 SETIFVAL(va_ctime.tv_nsec, long);
926 SETIFVAL(va_mtime.tv_nsec, long);
927 SETIFVAL(va_birthtime.tv_nsec, long);
928
929 if (CHANGED(va_flags, u_long)) {
930 /* XXX Can we handle system flags here...? */
931 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_FLAGS, vp,
932 NULL, genfs_can_chflags(cred, vp->v_type, attr->va_uid,
933 false));
934 if (error)
935 return error;
936 }
937
938 SETIFVAL(va_flags, u_long);
939 #undef SETIFVAL
940 #undef CHANGED
941
942 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (uid_t)VNOVAL) {
943 uid_t uid =
944 (vap->va_uid != (uid_t)VNOVAL) ? vap->va_uid : attr->va_uid;
945 gid_t gid =
946 (vap->va_gid != (gid_t)VNOVAL) ? vap->va_gid : attr->va_gid;
947 error = kauth_authorize_vnode(cred,
948 KAUTH_VNODE_CHANGE_OWNERSHIP, vp, NULL,
949 genfs_can_chown(cred, attr->va_uid, attr->va_gid, uid,
950 gid));
951 if (error)
952 return error;
953 attr->va_uid = uid;
954 attr->va_gid = gid;
955 }
956
957 if (vap->va_mode != (mode_t)VNOVAL) {
958 mode_t mode = vap->va_mode;
959 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY,
960 vp, NULL, genfs_can_chmod(vp->v_type, cred, attr->va_uid,
961 attr->va_gid, mode));
962 if (error)
963 return error;
964 attr->va_mode = mode;
965 }
966
967 if (vp->v_type == VREG &&
968 vap->va_size != VSIZENOTSET &&
969 vap->va_size != rn->rn_dlen) {
970 void *newdata;
971 size_t copylen, newlen;
972
973 newlen = vap->va_size;
974 newdata = rump_hypermalloc(newlen, 0, true, "rumpfs");
975
976 copylen = MIN(rn->rn_dlen, newlen);
977 memset(newdata, 0, newlen);
978 memcpy(newdata, rn->rn_data, copylen);
979 rump_hyperfree(rn->rn_data, rn->rn_dlen);
980
981 rn->rn_data = newdata;
982 rn->rn_dlen = newlen;
983 uvm_vnp_setsize(vp, newlen);
984 }
985 return 0;
986 }
987
988 static int
989 rump_vop_mkdir(void *v)
990 {
991 struct vop_mkdir_args /* {
992 struct vnode *a_dvp;
993 struct vnode **a_vpp;
994 struct componentname *a_cnp;
995 struct vattr *a_vap;
996 }; */ *ap = v;
997 struct vnode *dvp = ap->a_dvp;
998 struct vnode **vpp = ap->a_vpp;
999 struct componentname *cnp = ap->a_cnp;
1000 struct rumpfs_node *rnd = dvp->v_data, *rn;
1001 int rv = 0;
1002
1003 rn = makeprivate(VDIR, NODEV, DEV_BSIZE, false);
1004 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1005 rn->rn_va.va_flags |= UF_OPAQUE;
1006 rn->rn_parent = rnd;
1007 rv = makevnode(dvp->v_mount, rn, vpp);
1008 if (rv)
1009 goto out;
1010
1011 makedir(rnd, cnp, rn);
1012
1013 out:
1014 vput(dvp);
1015 return rv;
1016 }
1017
1018 static int
1019 rump_vop_rmdir(void *v)
1020 {
1021 struct vop_rmdir_args /* {
1022 struct vnode *a_dvp;
1023 struct vnode *a_vp;
1024 struct componentname *a_cnp;
1025 }; */ *ap = v;
1026 struct vnode *dvp = ap->a_dvp;
1027 struct vnode *vp = ap->a_vp;
1028 struct componentname *cnp = ap->a_cnp;
1029 struct rumpfs_node *rnd = dvp->v_data;
1030 struct rumpfs_node *rn = vp->v_data;
1031 struct rumpfs_dent *rd;
1032 int rv = 0;
1033
1034 LIST_FOREACH(rd, &rn->rn_dir, rd_entries) {
1035 if (rd->rd_node != RUMPFS_WHITEOUT) {
1036 rv = ENOTEMPTY;
1037 goto out;
1038 }
1039 }
1040 while ((rd = LIST_FIRST(&rn->rn_dir)) != NULL) {
1041 KASSERT(rd->rd_node == RUMPFS_WHITEOUT);
1042 LIST_REMOVE(rd, rd_entries);
1043 kmem_free(rd->rd_name, rd->rd_namelen+1);
1044 kmem_free(rd, sizeof(*rd));
1045 }
1046
1047 freedir(rnd, cnp);
1048 rn->rn_flags |= RUMPNODE_CANRECLAIM;
1049 rn->rn_parent = NULL;
1050
1051 out:
1052 vput(dvp);
1053 vput(vp);
1054
1055 return rv;
1056 }
1057
1058 static int
1059 rump_vop_remove(void *v)
1060 {
1061 struct vop_remove_args /* {
1062 struct vnode *a_dvp;
1063 struct vnode *a_vp;
1064 struct componentname *a_cnp;
1065 }; */ *ap = v;
1066 struct vnode *dvp = ap->a_dvp;
1067 struct vnode *vp = ap->a_vp;
1068 struct componentname *cnp = ap->a_cnp;
1069 struct rumpfs_node *rnd = dvp->v_data;
1070 struct rumpfs_node *rn = vp->v_data;
1071 int rv = 0;
1072
1073 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1074 return EOPNOTSUPP;
1075
1076 if (vp->v_type == VREG) {
1077 rump_hyperfree(rn->rn_data, rn->rn_dlen);
1078 }
1079
1080 freedir(rnd, cnp);
1081 rn->rn_flags |= RUMPNODE_CANRECLAIM;
1082
1083 vput(dvp);
1084 vput(vp);
1085
1086 return rv;
1087 }
1088
1089 static int
1090 rump_vop_mknod(void *v)
1091 {
1092 struct vop_mknod_args /* {
1093 struct vnode *a_dvp;
1094 struct vnode **a_vpp;
1095 struct componentname *a_cnp;
1096 struct vattr *a_vap;
1097 }; */ *ap = v;
1098 struct vnode *dvp = ap->a_dvp;
1099 struct vnode **vpp = ap->a_vpp;
1100 struct componentname *cnp = ap->a_cnp;
1101 struct vattr *va = ap->a_vap;
1102 struct rumpfs_node *rnd = dvp->v_data, *rn;
1103 int rv;
1104
1105 rn = makeprivate(va->va_type, va->va_rdev, DEV_BSIZE, false);
1106 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1107 rn->rn_va.va_flags |= UF_OPAQUE;
1108 rv = makevnode(dvp->v_mount, rn, vpp);
1109 if (rv)
1110 goto out;
1111
1112 makedir(rnd, cnp, rn);
1113
1114 out:
1115 vput(dvp);
1116 return rv;
1117 }
1118
1119 static int
1120 rump_vop_create(void *v)
1121 {
1122 struct vop_create_args /* {
1123 struct vnode *a_dvp;
1124 struct vnode **a_vpp;
1125 struct componentname *a_cnp;
1126 struct vattr *a_vap;
1127 }; */ *ap = v;
1128 struct vnode *dvp = ap->a_dvp;
1129 struct vnode **vpp = ap->a_vpp;
1130 struct componentname *cnp = ap->a_cnp;
1131 struct vattr *va = ap->a_vap;
1132 struct rumpfs_node *rnd = dvp->v_data, *rn;
1133 off_t newsize;
1134 int rv;
1135
1136 newsize = va->va_type == VSOCK ? DEV_BSIZE : 0;
1137 rn = makeprivate(va->va_type, NODEV, newsize, false);
1138 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1139 rn->rn_va.va_flags |= UF_OPAQUE;
1140 rv = makevnode(dvp->v_mount, rn, vpp);
1141 if (rv)
1142 goto out;
1143
1144 makedir(rnd, cnp, rn);
1145
1146 out:
1147 vput(dvp);
1148 return rv;
1149 }
1150
1151 static int
1152 rump_vop_symlink(void *v)
1153 {
1154 struct vop_symlink_args /* {
1155 struct vnode *a_dvp;
1156 struct vnode **a_vpp;
1157 struct componentname *a_cnp;
1158 struct vattr *a_vap;
1159 char *a_target;
1160 }; */ *ap = v;
1161 struct vnode *dvp = ap->a_dvp;
1162 struct vnode **vpp = ap->a_vpp;
1163 struct componentname *cnp = ap->a_cnp;
1164 struct rumpfs_node *rnd = dvp->v_data, *rn;
1165 const char *target = ap->a_target;
1166 size_t linklen;
1167 int rv;
1168
1169 linklen = strlen(target);
1170 KASSERT(linklen < MAXPATHLEN);
1171 rn = makeprivate(VLNK, NODEV, linklen, false);
1172 if ((cnp->cn_flags & ISWHITEOUT) != 0)
1173 rn->rn_va.va_flags |= UF_OPAQUE;
1174 rv = makevnode(dvp->v_mount, rn, vpp);
1175 if (rv)
1176 goto out;
1177
1178 makedir(rnd, cnp, rn);
1179
1180 KASSERT(linklen < MAXPATHLEN);
1181 rn->rn_linktarg = PNBUF_GET();
1182 rn->rn_linklen = linklen;
1183 strcpy(rn->rn_linktarg, target);
1184
1185 out:
1186 vput(dvp);
1187 return rv;
1188 }
1189
1190 static int
1191 rump_vop_readlink(void *v)
1192 {
1193 struct vop_readlink_args /* {
1194 struct vnode *a_vp;
1195 struct uio *a_uio;
1196 kauth_cred_t a_cred;
1197 }; */ *ap = v;
1198 struct vnode *vp = ap->a_vp;
1199 struct rumpfs_node *rn = vp->v_data;
1200 struct uio *uio = ap->a_uio;
1201
1202 return uiomove(rn->rn_linktarg, rn->rn_linklen, uio);
1203 }
1204
1205 static int
1206 rump_vop_whiteout(void *v)
1207 {
1208 struct vop_whiteout_args /* {
1209 struct vnode *a_dvp;
1210 struct componentname *a_cnp;
1211 int a_flags;
1212 } */ *ap = v;
1213 struct vnode *dvp = ap->a_dvp;
1214 struct rumpfs_node *rnd = dvp->v_data;
1215 struct componentname *cnp = ap->a_cnp;
1216 int flags = ap->a_flags;
1217
1218 switch (flags) {
1219 case LOOKUP:
1220 break;
1221 case CREATE:
1222 makedir(rnd, cnp, RUMPFS_WHITEOUT);
1223 break;
1224 case DELETE:
1225 cnp->cn_flags &= ~DOWHITEOUT; /* cargo culting never fails ? */
1226 freedir(rnd, cnp);
1227 break;
1228 default:
1229 panic("unknown whiteout op %d", flags);
1230 }
1231
1232 return 0;
1233 }
1234
1235 static int
1236 rump_vop_open(void *v)
1237 {
1238 struct vop_open_args /* {
1239 struct vnode *a_vp;
1240 int a_mode;
1241 kauth_cred_t a_cred;
1242 } */ *ap = v;
1243 struct vnode *vp = ap->a_vp;
1244 struct rumpfs_node *rn = vp->v_data;
1245 int mode = ap->a_mode;
1246 int error = EINVAL;
1247
1248 if (vp->v_type != VREG || (rn->rn_flags & RUMPNODE_ET_PHONE_HOST) == 0)
1249 return 0;
1250
1251 if (mode & FREAD) {
1252 if (rn->rn_readfd != -1)
1253 return 0;
1254 rn->rn_readfd = rumpuser_open(rn->rn_hostpath,
1255 RUMPUSER_OPEN_RDONLY, &error);
1256 }
1257
1258 if (mode & FWRITE) {
1259 if (rn->rn_writefd != -1)
1260 return 0;
1261 rn->rn_writefd = rumpuser_open(rn->rn_hostpath,
1262 RUMPUSER_OPEN_WRONLY, &error);
1263 }
1264
1265 return error;
1266 }
1267
1268 /* simple readdir. event omits dotstuff and periods */
1269 static int
1270 rump_vop_readdir(void *v)
1271 {
1272 struct vop_readdir_args /* {
1273 struct vnode *a_vp;
1274 struct uio *a_uio;
1275 kauth_cred_t a_cred;
1276 int *a_eofflag;
1277 off_t **a_cookies;
1278 int *a_ncookies;
1279 } */ *ap = v;
1280 struct vnode *vp = ap->a_vp;
1281 struct uio *uio = ap->a_uio;
1282 struct rumpfs_node *rnd = vp->v_data;
1283 struct rumpfs_dent *rdent;
1284 unsigned i;
1285 int rv = 0;
1286
1287 /* seek to current entry */
1288 for (i = 0, rdent = LIST_FIRST(&rnd->rn_dir);
1289 (i < uio->uio_offset) && rdent;
1290 i++, rdent = LIST_NEXT(rdent, rd_entries))
1291 continue;
1292 if (!rdent)
1293 goto out;
1294
1295 /* copy entries */
1296 for (; rdent && uio->uio_resid > 0;
1297 rdent = LIST_NEXT(rdent, rd_entries), i++) {
1298 struct dirent dent;
1299
1300 strlcpy(dent.d_name, rdent->rd_name, sizeof(dent.d_name));
1301 dent.d_namlen = strlen(dent.d_name);
1302 dent.d_reclen = _DIRENT_RECLEN(&dent, dent.d_namlen);
1303
1304 if (__predict_false(RDENT_ISWHITEOUT(rdent))) {
1305 dent.d_fileno = INO_WHITEOUT;
1306 dent.d_type = DT_WHT;
1307 } else {
1308 dent.d_fileno = rdent->rd_node->rn_va.va_fileid;
1309 dent.d_type = vtype2dt(rdent->rd_node->rn_va.va_type);
1310 }
1311
1312 if (uio->uio_resid < dent.d_reclen) {
1313 i--;
1314 break;
1315 }
1316
1317 rv = uiomove(&dent, dent.d_reclen, uio);
1318 if (rv) {
1319 i--;
1320 break;
1321 }
1322 }
1323
1324 out:
1325 if (ap->a_cookies) {
1326 *ap->a_ncookies = 0;
1327 *ap->a_cookies = NULL;
1328 }
1329 if (rdent)
1330 *ap->a_eofflag = 0;
1331 else
1332 *ap->a_eofflag = 1;
1333 uio->uio_offset = i;
1334
1335 return rv;
1336 }
1337
1338 static int
1339 etread(struct rumpfs_node *rn, struct uio *uio)
1340 {
1341 struct rumpuser_iovec iov;
1342 uint8_t *buf;
1343 size_t bufsize;
1344 ssize_t n;
1345 int error = 0;
1346
1347 bufsize = uio->uio_resid;
1348 if (bufsize == 0)
1349 return 0;
1350 buf = kmem_alloc(bufsize, KM_SLEEP);
1351 iov.iov_base = buf;
1352 iov.iov_len = bufsize;
1353 if ((n = rumpuser_iovread(rn->rn_readfd, &iov, 1,
1354 uio->uio_offset + rn->rn_offset, &error)) == -1)
1355 goto out;
1356 KASSERT(n <= bufsize);
1357 error = uiomove(buf, n, uio);
1358
1359 out:
1360 kmem_free(buf, bufsize);
1361 return error;
1362
1363 }
1364
1365 static int
1366 rump_vop_read(void *v)
1367 {
1368 struct vop_read_args /* {
1369 struct vnode *a_vp;
1370 struct uio *a_uio;
1371 int ioflags a_ioflag;
1372 kauth_cred_t a_cred;
1373 }; */ *ap = v;
1374 struct vnode *vp = ap->a_vp;
1375 struct rumpfs_node *rn = vp->v_data;
1376 struct uio *uio = ap->a_uio;
1377 const int advice = IO_ADV_DECODE(ap->a_ioflag);
1378 off_t chunk;
1379 int error = 0;
1380
1381 if (vp->v_type == VDIR)
1382 return EISDIR;
1383
1384 /* et op? */
1385 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1386 return etread(rn, uio);
1387
1388 /* otherwise, it's off to ubc with us */
1389 while (uio->uio_resid > 0) {
1390 chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1391 if (chunk == 0)
1392 break;
1393 error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1394 UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
1395 if (error)
1396 break;
1397 }
1398
1399 return error;
1400 }
1401
1402 static int
1403 etwrite(struct rumpfs_node *rn, struct uio *uio)
1404 {
1405 struct rumpuser_iovec iov;
1406 uint8_t *buf;
1407 size_t bufsize;
1408 ssize_t n;
1409 int error = 0;
1410
1411 bufsize = uio->uio_resid;
1412 if (bufsize == 0)
1413 return 0;
1414 buf = kmem_alloc(bufsize, KM_SLEEP);
1415 error = uiomove(buf, bufsize, uio);
1416 if (error)
1417 goto out;
1418 KASSERT(uio->uio_resid == 0);
1419 iov.iov_base = buf;
1420 iov.iov_len = bufsize;
1421 n = rumpuser_iovwrite(rn->rn_writefd, &iov, 1,
1422 (uio->uio_offset-bufsize) + rn->rn_offset, &error);
1423 if (n >= 0) {
1424 KASSERT(n <= bufsize);
1425 uio->uio_resid = bufsize - n;
1426 }
1427
1428 out:
1429 kmem_free(buf, bufsize);
1430 return error;
1431 }
1432
1433 static int
1434 rump_vop_write(void *v)
1435 {
1436 struct vop_write_args /* {
1437 struct vnode *a_vp;
1438 struct uio *a_uio;
1439 int ioflags a_ioflag;
1440 kauth_cred_t a_cred;
1441 }; */ *ap = v;
1442 struct vnode *vp = ap->a_vp;
1443 struct rumpfs_node *rn = vp->v_data;
1444 struct uio *uio = ap->a_uio;
1445 const int advice = IO_ADV_DECODE(ap->a_ioflag);
1446 void *olddata;
1447 size_t oldlen, newlen;
1448 off_t chunk;
1449 int error = 0;
1450 bool allocd = false;
1451
1452 if (ap->a_ioflag & IO_APPEND)
1453 uio->uio_offset = vp->v_size;
1454
1455 /* consult et? */
1456 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST)
1457 return etwrite(rn, uio);
1458
1459 /*
1460 * Otherwise, it's a case of ubcmove.
1461 */
1462
1463 /*
1464 * First, make sure we have enough storage.
1465 *
1466 * No, you don't need to tell me it's not very efficient.
1467 * No, it doesn't really support sparse files, just fakes it.
1468 */
1469 newlen = uio->uio_offset + uio->uio_resid;
1470 oldlen = 0; /* XXXgcc */
1471 olddata = NULL;
1472 if (rn->rn_dlen < newlen) {
1473 oldlen = rn->rn_dlen;
1474 olddata = rn->rn_data;
1475
1476 rn->rn_data = rump_hypermalloc(newlen, 0, true, "rumpfs");
1477 rn->rn_dlen = newlen;
1478 memset(rn->rn_data, 0, newlen);
1479 memcpy(rn->rn_data, olddata, oldlen);
1480 allocd = true;
1481 uvm_vnp_setsize(vp, newlen);
1482 }
1483
1484 /* ok, we have enough stooorage. write */
1485 while (uio->uio_resid > 0) {
1486 chunk = MIN(uio->uio_resid, (off_t)rn->rn_dlen-uio->uio_offset);
1487 if (chunk == 0)
1488 break;
1489 error = ubc_uiomove(&vp->v_uobj, uio, chunk, advice,
1490 UBC_WRITE | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
1491 if (error)
1492 break;
1493 }
1494
1495 if (allocd) {
1496 if (error) {
1497 rump_hyperfree(rn->rn_data, newlen);
1498 rn->rn_data = olddata;
1499 rn->rn_dlen = oldlen;
1500 uvm_vnp_setsize(vp, oldlen);
1501 } else {
1502 rump_hyperfree(olddata, oldlen);
1503 }
1504 }
1505
1506 return error;
1507 }
1508
1509 static int
1510 rump_vop_bmap(void *v)
1511 {
1512 struct vop_bmap_args /* {
1513 struct vnode *a_vp;
1514 daddr_t a_bn;
1515 struct vnode **a_vpp;
1516 daddr_t *a_bnp;
1517 int *a_runp;
1518 } */ *ap = v;
1519
1520 /* 1:1 mapping */
1521 if (ap->a_vpp)
1522 *ap->a_vpp = ap->a_vp;
1523 if (ap->a_bnp)
1524 *ap->a_bnp = ap->a_bn;
1525 if (ap->a_runp)
1526 *ap->a_runp = 16;
1527
1528 return 0;
1529 }
1530
1531 static int
1532 rump_vop_strategy(void *v)
1533 {
1534 struct vop_strategy_args /* {
1535 struct vnode *a_vp;
1536 struct buf *a_bp;
1537 } */ *ap = v;
1538 struct vnode *vp = ap->a_vp;
1539 struct rumpfs_node *rn = vp->v_data;
1540 struct buf *bp = ap->a_bp;
1541 off_t copylen, copyoff;
1542 int error;
1543
1544 if (vp->v_type != VREG || rn->rn_flags & RUMPNODE_ET_PHONE_HOST) {
1545 error = EINVAL;
1546 goto out;
1547 }
1548
1549 copyoff = bp->b_blkno << DEV_BSHIFT;
1550 copylen = MIN(rn->rn_dlen - copyoff, bp->b_bcount);
1551 if (BUF_ISWRITE(bp)) {
1552 memcpy((uint8_t *)rn->rn_data + copyoff, bp->b_data, copylen);
1553 } else {
1554 memset((uint8_t*)bp->b_data + copylen, 0, bp->b_bcount-copylen);
1555 memcpy(bp->b_data, (uint8_t *)rn->rn_data + copyoff, copylen);
1556 }
1557 bp->b_resid = 0;
1558 error = 0;
1559
1560 out:
1561 bp->b_error = error;
1562 biodone(bp);
1563 return 0;
1564 }
1565
1566 static int
1567 rump_vop_pathconf(void *v)
1568 {
1569 struct vop_pathconf_args /* {
1570 struct vnode *a_vp;
1571 int a_name;
1572 register_t *a_retval;
1573 }; */ *ap = v;
1574 int name = ap->a_name;
1575 register_t *retval = ap->a_retval;
1576
1577 switch (name) {
1578 case _PC_LINK_MAX:
1579 *retval = LINK_MAX;
1580 return 0;
1581 case _PC_NAME_MAX:
1582 *retval = RUMPFS_MAXNAMLEN;
1583 return 0;
1584 case _PC_PATH_MAX:
1585 *retval = PATH_MAX;
1586 return 0;
1587 case _PC_PIPE_BUF:
1588 *retval = PIPE_BUF;
1589 return 0;
1590 case _PC_CHOWN_RESTRICTED:
1591 *retval = 1;
1592 return 0;
1593 case _PC_NO_TRUNC:
1594 *retval = 1;
1595 return 0;
1596 case _PC_SYNC_IO:
1597 *retval = 1;
1598 return 0;
1599 case _PC_FILESIZEBITS:
1600 *retval = 43; /* this one goes to 11 */
1601 return 0;
1602 case _PC_SYMLINK_MAX:
1603 *retval = MAXPATHLEN;
1604 return 0;
1605 case _PC_2_SYMLINKS:
1606 *retval = 1;
1607 return 0;
1608 default:
1609 return EINVAL;
1610 }
1611 }
1612
1613 static int
1614 rump_vop_success(void *v)
1615 {
1616
1617 return 0;
1618 }
1619
1620 static int
1621 rump_vop_inactive(void *v)
1622 {
1623 struct vop_inactive_args /* {
1624 struct vnode *a_vp;
1625 bool *a_recycle;
1626 } */ *ap = v;
1627 struct vnode *vp = ap->a_vp;
1628 struct rumpfs_node *rn = vp->v_data;
1629 int error;
1630
1631 if (rn->rn_flags & RUMPNODE_ET_PHONE_HOST && vp->v_type == VREG) {
1632 if (rn->rn_readfd != -1) {
1633 rumpuser_close(rn->rn_readfd, &error);
1634 rn->rn_readfd = -1;
1635 }
1636 if (rn->rn_writefd != -1) {
1637 rumpuser_close(rn->rn_writefd, &error);
1638 rn->rn_writefd = -1;
1639 }
1640 }
1641 *ap->a_recycle = (rn->rn_flags & RUMPNODE_CANRECLAIM) ? true : false;
1642
1643 VOP_UNLOCK(vp);
1644 return 0;
1645 }
1646
1647 static int
1648 rump_vop_reclaim(void *v)
1649 {
1650 struct vop_reclaim_args /* {
1651 struct vnode *a_vp;
1652 } */ *ap = v;
1653 struct vnode *vp = ap->a_vp;
1654 struct rumpfs_node *rn = vp->v_data;
1655
1656 mutex_enter(&reclock);
1657 rn->rn_vp = NULL;
1658 mutex_exit(&reclock);
1659 genfs_node_destroy(vp);
1660 vp->v_data = NULL;
1661
1662 if (rn->rn_flags & RUMPNODE_CANRECLAIM) {
1663 if (vp->v_type == VLNK)
1664 PNBUF_PUT(rn->rn_linktarg);
1665 if (rn->rn_hostpath)
1666 free(rn->rn_hostpath, M_TEMP);
1667 kmem_free(rn, sizeof(*rn));
1668 }
1669
1670 return 0;
1671 }
1672
1673 static int
1674 rump_vop_spec(void *v)
1675 {
1676 struct vop_generic_args *ap = v;
1677 int (**opvec)(void *);
1678
1679 switch (ap->a_desc->vdesc_offset) {
1680 case VOP_ACCESS_DESCOFFSET:
1681 case VOP_GETATTR_DESCOFFSET:
1682 case VOP_SETATTR_DESCOFFSET:
1683 case VOP_LOCK_DESCOFFSET:
1684 case VOP_UNLOCK_DESCOFFSET:
1685 case VOP_ISLOCKED_DESCOFFSET:
1686 case VOP_RECLAIM_DESCOFFSET:
1687 opvec = rump_vnodeop_p;
1688 break;
1689 default:
1690 opvec = spec_vnodeop_p;
1691 break;
1692 }
1693
1694 return VOCALL(opvec, ap->a_desc->vdesc_offset, v);
1695 }
1696
1697 static int
1698 rump_vop_advlock(void *v)
1699 {
1700 struct vop_advlock_args /* {
1701 const struct vnodeop_desc *a_desc;
1702 struct vnode *a_vp;
1703 void *a_id;
1704 int a_op;
1705 struct flock *a_fl;
1706 int a_flags;
1707 } */ *ap = v;
1708 struct vnode *vp = ap->a_vp;
1709 struct rumpfs_node *rn = vp->v_data;
1710
1711 return lf_advlock(ap, &rn->rn_lockf, vp->v_size);
1712 }
1713
1714 /*
1715 * Begin vfs-level stuff
1716 */
1717
1718 VFS_PROTOS(rumpfs);
1719 struct vfsops rumpfs_vfsops = {
1720 .vfs_name = MOUNT_RUMPFS,
1721 .vfs_min_mount_data = 0,
1722 .vfs_mount = rumpfs_mount,
1723 .vfs_start = (void *)nullop,
1724 .vfs_unmount = rumpfs_unmount,
1725 .vfs_root = rumpfs_root,
1726 .vfs_quotactl = (void *)eopnotsupp,
1727 .vfs_statvfs = genfs_statvfs,
1728 .vfs_sync = (void *)nullop,
1729 .vfs_vget = rumpfs_vget,
1730 .vfs_fhtovp = (void *)eopnotsupp,
1731 .vfs_vptofh = (void *)eopnotsupp,
1732 .vfs_init = rumpfs_init,
1733 .vfs_reinit = NULL,
1734 .vfs_done = rumpfs_done,
1735 .vfs_mountroot = rumpfs_mountroot,
1736 .vfs_snapshot = (void *)eopnotsupp,
1737 .vfs_extattrctl = (void *)eopnotsupp,
1738 .vfs_suspendctl = (void *)eopnotsupp,
1739 .vfs_renamelock_enter = genfs_renamelock_enter,
1740 .vfs_renamelock_exit = genfs_renamelock_exit,
1741 .vfs_opv_descs = rump_opv_descs,
1742 /* vfs_refcount */
1743 /* vfs_list */
1744 };
1745
1746 static int
1747 rumpfs_mountfs(struct mount *mp)
1748 {
1749 struct rumpfs_mount *rfsmp;
1750 struct rumpfs_node *rn;
1751 int error;
1752
1753 rfsmp = kmem_alloc(sizeof(*rfsmp), KM_SLEEP);
1754
1755 rn = makeprivate(VDIR, NODEV, DEV_BSIZE, false);
1756 rn->rn_parent = rn;
1757 if ((error = makevnode(mp, rn, &rfsmp->rfsmp_rvp)) != 0)
1758 return error;
1759
1760 rfsmp->rfsmp_rvp->v_vflag |= VV_ROOT;
1761 VOP_UNLOCK(rfsmp->rfsmp_rvp);
1762
1763 mp->mnt_data = rfsmp;
1764 mp->mnt_stat.f_namemax = RUMPFS_MAXNAMLEN;
1765 mp->mnt_stat.f_iosize = 512;
1766 mp->mnt_flag |= MNT_LOCAL;
1767 mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO;
1768 mp->mnt_fs_bshift = DEV_BSHIFT;
1769 vfs_getnewfsid(mp);
1770
1771 return 0;
1772 }
1773
1774 int
1775 rumpfs_mount(struct mount *mp, const char *mntpath, void *arg, size_t *alen)
1776 {
1777 int error;
1778
1779 if (mp->mnt_flag & MNT_UPDATE) {
1780 return 0;
1781 }
1782
1783 error = set_statvfs_info(mntpath, UIO_USERSPACE, "rumpfs", UIO_SYSSPACE,
1784 mp->mnt_op->vfs_name, mp, curlwp);
1785 if (error)
1786 return error;
1787
1788 return rumpfs_mountfs(mp);
1789 }
1790
1791 int
1792 rumpfs_unmount(struct mount *mp, int mntflags)
1793 {
1794 struct rumpfs_mount *rfsmp = mp->mnt_data;
1795 int flags = 0, error;
1796
1797 if (panicstr || mntflags & MNT_FORCE)
1798 flags |= FORCECLOSE;
1799
1800 if ((error = vflush(mp, rfsmp->rfsmp_rvp, flags)) != 0)
1801 return error;
1802 vgone(rfsmp->rfsmp_rvp); /* XXX */
1803
1804 kmem_free(rfsmp, sizeof(*rfsmp));
1805
1806 return 0;
1807 }
1808
1809 int
1810 rumpfs_root(struct mount *mp, struct vnode **vpp)
1811 {
1812 struct rumpfs_mount *rfsmp = mp->mnt_data;
1813
1814 vref(rfsmp->rfsmp_rvp);
1815 vn_lock(rfsmp->rfsmp_rvp, LK_EXCLUSIVE | LK_RETRY);
1816 *vpp = rfsmp->rfsmp_rvp;
1817 return 0;
1818 }
1819
1820 int
1821 rumpfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1822 {
1823
1824 return EOPNOTSUPP;
1825 }
1826
1827 void
1828 rumpfs_init()
1829 {
1830
1831 CTASSERT(RUMP_ETFS_SIZE_ENDOFF == RUMPBLK_SIZENOTSET);
1832
1833 mutex_init(&reclock, MUTEX_DEFAULT, IPL_NONE);
1834 mutex_init(&etfs_lock, MUTEX_DEFAULT, IPL_NONE);
1835 }
1836
1837 void
1838 rumpfs_done()
1839 {
1840
1841 mutex_destroy(&reclock);
1842 mutex_destroy(&etfs_lock);
1843 }
1844
1845 int
1846 rumpfs_mountroot()
1847 {
1848 struct mount *mp;
1849 int error;
1850
1851 if ((error = vfs_rootmountalloc(MOUNT_RUMPFS, "rootdev", &mp)) != 0) {
1852 vrele(rootvp);
1853 return error;
1854 }
1855
1856 if ((error = rumpfs_mountfs(mp)) != 0)
1857 panic("mounting rootfs failed: %d", error);
1858
1859 mutex_enter(&mountlist_lock);
1860 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1861 mutex_exit(&mountlist_lock);
1862
1863 error = set_statvfs_info("/", UIO_SYSSPACE, "rumpfs", UIO_SYSSPACE,
1864 mp->mnt_op->vfs_name, mp, curlwp);
1865 if (error)
1866 panic("set_statvfs_info failed for rootfs: %d", error);
1867
1868 mp->mnt_flag &= ~MNT_RDONLY;
1869 vfs_unbusy(mp, false, NULL);
1870
1871 return 0;
1872 }
1873