Home | History | Annotate | Line # | Download | only in libukfs
ukfs.c revision 1.45
      1 /*	$NetBSD: ukfs.c,v 1.45 2009/12/11 21:20:52 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2007, 2008, 2009  Antti Kantee.  All Rights Reserved.
      5  *
      6  * Development of this software was supported by the
      7  * Finnish Cultural Foundation.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     21  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     28  * SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * This library enables access to files systems directly without
     33  * involving system calls.
     34  */
     35 
     36 #ifdef __linux__
     37 #define _XOPEN_SOURCE 500
     38 #define _BSD_SOURCE
     39 #define _FILE_OFFSET_BITS 64
     40 #endif
     41 
     42 #include <sys/param.h>
     43 #include <sys/queue.h>
     44 #include <sys/stat.h>
     45 #include <sys/sysctl.h>
     46 #include <sys/mount.h>
     47 
     48 #include <assert.h>
     49 #include <dirent.h>
     50 #include <dlfcn.h>
     51 #include <err.h>
     52 #include <errno.h>
     53 #include <fcntl.h>
     54 #include <pthread.h>
     55 #include <stdio.h>
     56 #include <stdlib.h>
     57 #include <string.h>
     58 #include <unistd.h>
     59 #include <stdint.h>
     60 
     61 #include <rump/ukfs.h>
     62 
     63 #include <rump/rump.h>
     64 #include <rump/rump_syscalls.h>
     65 #include <rump/rumpuser.h>
     66 
     67 #include "ukfs_int_disklabel.h"
     68 
     69 #define UKFS_MODE_DEFAULT 0555
     70 
     71 struct ukfs {
     72 	struct mount *ukfs_mp;
     73 	struct vnode *ukfs_rvp;
     74 	void *ukfs_specific;
     75 
     76 	pthread_spinlock_t ukfs_spin;
     77 	pid_t ukfs_nextpid;
     78 	struct vnode *ukfs_cdir;
     79 	int ukfs_devfd;
     80 	char *ukfs_devpath;
     81 	char *ukfs_mountpath;
     82 	struct ukfs_part *ukfs_part;
     83 };
     84 
     85 static int builddirs(const char *, mode_t,
     86     int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *);
     87 
     88 struct mount *
     89 ukfs_getmp(struct ukfs *ukfs)
     90 {
     91 
     92 	return ukfs->ukfs_mp;
     93 }
     94 
     95 struct vnode *
     96 ukfs_getrvp(struct ukfs *ukfs)
     97 {
     98 	struct vnode *rvp;
     99 
    100 	rvp = ukfs->ukfs_rvp;
    101 	rump_pub_vp_incref(rvp);
    102 
    103 	return rvp;
    104 }
    105 
    106 void
    107 ukfs_setspecific(struct ukfs *ukfs, void *priv)
    108 {
    109 
    110 	ukfs->ukfs_specific = priv;
    111 }
    112 
    113 void *
    114 ukfs_getspecific(struct ukfs *ukfs)
    115 {
    116 
    117 	return ukfs->ukfs_specific;
    118 }
    119 
    120 #ifdef DONT_WANT_PTHREAD_LINKAGE
    121 #define pthread_spin_lock(a)
    122 #define pthread_spin_unlock(a)
    123 #define pthread_spin_init(a,b)
    124 #define pthread_spin_destroy(a)
    125 #endif
    126 
    127 static pid_t
    128 nextpid(struct ukfs *ukfs)
    129 {
    130 	pid_t npid;
    131 
    132 	pthread_spin_lock(&ukfs->ukfs_spin);
    133 	if (ukfs->ukfs_nextpid == 0)
    134 		ukfs->ukfs_nextpid++;
    135 	npid = ukfs->ukfs_nextpid++;
    136 	pthread_spin_unlock(&ukfs->ukfs_spin);
    137 
    138 	return npid;
    139 }
    140 
    141 static void
    142 precall(struct ukfs *ukfs)
    143 {
    144 	struct vnode *rvp, *cvp;
    145 
    146 	rump_pub_lwp_alloc_and_switch(nextpid(ukfs), 1);
    147 	rvp = ukfs_getrvp(ukfs);
    148 	pthread_spin_lock(&ukfs->ukfs_spin);
    149 	cvp = ukfs->ukfs_cdir;
    150 	pthread_spin_unlock(&ukfs->ukfs_spin);
    151 	rump_pub_rcvp_set(rvp, cvp); /* takes refs */
    152 	rump_pub_vp_rele(rvp);
    153 }
    154 
    155 static void
    156 postcall(struct ukfs *ukfs)
    157 {
    158 	struct vnode *rvp;
    159 
    160 	rvp = ukfs_getrvp(ukfs);
    161 	rump_pub_rcvp_set(NULL, rvp);
    162 	rump_pub_vp_rele(rvp);
    163 	rump_pub_lwp_release(rump_pub_lwp_curlwp());
    164 }
    165 
    166 struct ukfs_part {
    167 	int part_type;
    168 	char part_labelchar;
    169 	off_t part_devoff;
    170 	off_t part_devsize;
    171 };
    172 
    173 enum ukfs_parttype { UKFS_PART_NONE, UKFS_PART_DISKLABEL, UKFS_PART_OFFSET };
    174 
    175 static struct ukfs_part ukfs__part_none = {
    176 	.part_type = UKFS_PART_NONE,
    177 	.part_devoff = 0,
    178 	.part_devsize = RUMP_ETFS_SIZE_ENDOFF,
    179 };
    180 static struct ukfs_part ukfs__part_na;
    181 struct ukfs_part *ukfs_part_none = &ukfs__part_none;
    182 struct ukfs_part *ukfs_part_na = &ukfs__part_na;
    183 
    184 #define PART2LOCKSIZE(len) ((len) == RUMP_ETFS_SIZE_ENDOFF ? 0 : (len))
    185 
    186 int
    187 _ukfs_init(int version)
    188 {
    189 	int rv;
    190 
    191 	if (version != UKFS_VERSION) {
    192 		printf("incompatible ukfs version, %d vs. %d\n",
    193 		    version, UKFS_VERSION);
    194 		errno = EPROGMISMATCH;
    195 		return -1;
    196 	}
    197 
    198 	if ((rv = rump_init()) != 0) {
    199 		errno = rv;
    200 		return -1;
    201 	}
    202 
    203 	return 0;
    204 }
    205 
    206 /*ARGSUSED*/
    207 static int
    208 rumpmkdir(struct ukfs *dummy, const char *path, mode_t mode)
    209 {
    210 
    211 	return rump_sys_mkdir(path, mode);
    212 }
    213 
    214 int
    215 ukfs_part_probe(char *devpath, struct ukfs_part **partp)
    216 {
    217 	struct ukfs_part *part;
    218 	char *p;
    219 	int error = 0;
    220 	int devfd = -1;
    221 
    222 	if ((p = strstr(devpath, UKFS_PARTITION_SCANMAGIC)) != NULL) {
    223 		fprintf(stderr, "ukfs: %%PART is deprecated.  use "
    224 		    "%%DISKLABEL instead\n");
    225 		errno = ENODEV;
    226 		return -1;
    227 	}
    228 
    229 	part = malloc(sizeof(*part));
    230 	if (part == NULL) {
    231 		errno = ENOMEM;
    232 		return -1;
    233 	}
    234 	part->part_type = UKFS_PART_NONE;
    235 
    236 	/*
    237 	 * Check for magic in pathname:
    238 	 *   disklabel: /regularpath%DISKLABEL:labelchar%\0
    239 	 *     offsets: /regularpath%OFFSET:start,end%\0
    240 	 */
    241 #define MAGICADJ_DISKLABEL(p, n) (p+sizeof(UKFS_DISKLABEL_SCANMAGIC)-1+n)
    242 	if ((p = strstr(devpath, UKFS_DISKLABEL_SCANMAGIC)) != NULL
    243 	    && strlen(p) == UKFS_DISKLABEL_MAGICLEN
    244 	    && *(MAGICADJ_DISKLABEL(p,1)) == '%') {
    245 		if (*(MAGICADJ_DISKLABEL(p,0)) >= 'a' &&
    246 		    *(MAGICADJ_DISKLABEL(p,0)) < 'a' + UKFS_MAXPARTITIONS) {
    247 			struct ukfs__disklabel dl;
    248 			struct ukfs__partition *pp;
    249 			char buf[65536];
    250 			char labelchar = *(MAGICADJ_DISKLABEL(p,0));
    251 			int partition = labelchar - 'a';
    252 
    253 			*p = '\0';
    254 			devfd = open(devpath, O_RDONLY);
    255 			if (devfd == -1) {
    256 				error = errno;
    257 				goto out;
    258 			}
    259 
    260 			/* Locate the disklabel and find the partition. */
    261 			if (pread(devfd, buf, sizeof(buf), 0) == -1) {
    262 				error = errno;
    263 				goto out;
    264 			}
    265 
    266 			if (ukfs__disklabel_scan(&dl, buf, sizeof(buf)) != 0) {
    267 				error = ENOENT;
    268 				goto out;
    269 			}
    270 
    271 			if (dl.d_npartitions < partition) {
    272 				error = ENOENT;
    273 				goto out;
    274 			}
    275 
    276 			pp = &dl.d_partitions[partition];
    277 			part->part_type = UKFS_PART_DISKLABEL;
    278 			part->part_labelchar = labelchar;
    279 			part->part_devoff = pp->p_offset << DEV_BSHIFT;
    280 			part->part_devsize = pp->p_size << DEV_BSHIFT;
    281 		} else {
    282 			error = EINVAL;
    283 		}
    284 #define MAGICADJ_OFFSET(p, n) (p+sizeof(UKFS_OFFSET_SCANMAGIC)-1+n)
    285 	} else if (((p = strstr(devpath, UKFS_OFFSET_SCANMAGIC)) != NULL)
    286 	    && (strlen(p) >= UKFS_OFFSET_MINLEN)) {
    287 		char *comma, *pers, *ep, *nptr;
    288 		u_quad_t val;
    289 
    290 		comma = strchr(p, ',');
    291 		if (comma == NULL) {
    292 			error = EINVAL;
    293 			goto out;
    294 		}
    295 		pers = strchr(comma, '%');
    296 		if (pers == NULL) {
    297 			error = EINVAL;
    298 			goto out;
    299 		}
    300 		*comma = '\0';
    301 		*pers = '\0';
    302 		*p = '\0';
    303 
    304 		nptr = MAGICADJ_OFFSET(p,0);
    305 		/* check if string is negative */
    306 		if (*nptr == '-') {
    307 			error = ERANGE;
    308 			goto out;
    309 		}
    310 		val = strtouq(nptr, &ep, 10);
    311 		if (val == UQUAD_MAX) {
    312 			error = ERANGE;
    313 			goto out;
    314 		}
    315 		if (*ep != '\0') {
    316 			error = EADDRNOTAVAIL; /* creative ;) */
    317 			goto out;
    318 		}
    319 		part->part_devoff = val;
    320 
    321 		/* omstart */
    322 
    323 		nptr = comma+1;
    324 		/* check if string is negative */
    325 		if (*nptr == '-') {
    326 			error = ERANGE;
    327 			goto out;
    328 		}
    329 		val = strtouq(nptr, &ep, 10);
    330 		if (val == UQUAD_MAX) {
    331 			error = ERANGE;
    332 			goto out;
    333 		}
    334 		if (*ep != '\0') {
    335 			error = EADDRNOTAVAIL; /* creative ;) */
    336 			goto out;
    337 		}
    338 		part->part_devsize = val;
    339 		part->part_type = UKFS_PART_OFFSET;
    340 	} else {
    341 		free(part);
    342 		part = ukfs_part_none;
    343 	}
    344 
    345  out:
    346 	if (devfd != -1)
    347 		close(devfd);
    348 	if (error) {
    349 		free(part);
    350 		errno = error;
    351 	} else {
    352 		*partp = part;
    353 	}
    354 
    355 	return error ? -1 : 0;
    356 }
    357 
    358 int
    359 ukfs_part_tostring(struct ukfs_part *part, char *str, size_t strsize)
    360 {
    361 	int rv;
    362 
    363 	*str = '\0';
    364 	/* "pseudo" values */
    365 	if (part == ukfs_part_na) {
    366 		errno = EINVAL;
    367 		return -1;
    368 	}
    369 	if (part == ukfs_part_none)
    370 		return 0;
    371 
    372 	rv = 0;
    373 	switch (part->part_type) {
    374 	case UKFS_PART_NONE:
    375 		break;
    376 
    377 	case UKFS_PART_DISKLABEL:
    378 		snprintf(str, strsize, "%%DISKLABEL:%c%%",part->part_labelchar);
    379 		rv = 1;
    380 		break;
    381 
    382 	case UKFS_PART_OFFSET:
    383 		snprintf(str, strsize, "[%llu,%llu]",
    384 		    (unsigned long long)part->part_devoff,
    385 		    (unsigned long long)(part->part_devoff+part->part_devsize));
    386 		rv = 1;
    387 		break;
    388 	}
    389 
    390 	return rv;
    391 }
    392 
    393 static void
    394 unlockdev(int fd, struct ukfs_part *part)
    395 {
    396 	struct flock flarg;
    397 
    398 	memset(&flarg, 0, sizeof(flarg));
    399 	flarg.l_type = F_UNLCK;
    400 	flarg.l_whence = SEEK_SET;
    401 	flarg.l_start = part->part_devoff;
    402 	flarg.l_len = PART2LOCKSIZE(part->part_devsize);
    403 	if (fcntl(fd, F_SETLK, &flarg) == -1)
    404 		warn("ukfs: cannot unlock device file");
    405 }
    406 
    407 /*
    408  * Open the disk file and flock it.  Also, if we are operation on
    409  * an embedded partition, find the partition offset and size from
    410  * the disklabel.
    411  *
    412  * We hard-fail only in two cases:
    413  *  1) we failed to get the partition info out (don't know what offset
    414  *     to mount from)
    415  *  2) we failed to flock the source device (i.e. fcntl() fails,
    416  *     not e.g. open() before it)
    417  *
    418  * Otherwise we let the code proceed to mount and let the file system
    419  * throw the proper error.  The only questionable bit is that if we
    420  * soft-fail before flock and mount does succeed...
    421  *
    422  * Returns: -1 error (errno reports error code)
    423  *           0 success
    424  *
    425  * dfdp: -1  device is not open
    426  *        n  device is open
    427  */
    428 static int
    429 process_diskdevice(const char *devpath, struct ukfs_part *part, int rdonly,
    430 	int *dfdp)
    431 {
    432 	struct stat sb;
    433 	int rv = 0, devfd;
    434 
    435 	/* defaults */
    436 	*dfdp = -1;
    437 
    438 	devfd = open(devpath, rdonly ? O_RDONLY : O_RDWR);
    439 	if (devfd == -1) {
    440 		rv = errno;
    441 		goto out;
    442 	}
    443 
    444 	if (fstat(devfd, &sb) == -1) {
    445 		rv = errno;
    446 		goto out;
    447 	}
    448 
    449 	/*
    450 	 * We do this only for non-block device since the
    451 	 * (NetBSD) kernel allows block device open only once.
    452 	 * We also need to close the device for fairly obvious reasons.
    453 	 */
    454 	if (!S_ISBLK(sb.st_mode)) {
    455 		struct flock flarg;
    456 
    457 		memset(&flarg, 0, sizeof(flarg));
    458 		flarg.l_type = rdonly ? F_RDLCK : F_WRLCK;
    459 		flarg.l_whence = SEEK_SET;
    460 		flarg.l_start = part->part_devoff;
    461 		flarg.l_len = PART2LOCKSIZE(part->part_devsize);
    462 		if (fcntl(devfd, F_SETLK, &flarg) == -1) {
    463 			pid_t holder;
    464 			int sverrno;
    465 
    466 			sverrno = errno;
    467 			if (fcntl(devfd, F_GETLK, &flarg) != 1)
    468 				holder = flarg.l_pid;
    469 			else
    470 				holder = -1;
    471 			warnx("ukfs_mount: cannot lock device.  held by pid %d",
    472 			    holder);
    473 			rv = sverrno;
    474 			goto out;
    475 		}
    476 	} else {
    477 		close(devfd);
    478 		devfd = -1;
    479 	}
    480 	*dfdp = devfd;
    481 
    482  out:
    483 	if (rv) {
    484 		if (devfd != -1)
    485 			close(devfd);
    486 	}
    487 
    488 	return rv;
    489 }
    490 
    491 static struct ukfs *
    492 doukfsmount(const char *vfsname, const char *devpath, struct ukfs_part *part,
    493 	const char *mountpath, int mntflags, void *arg, size_t alen)
    494 {
    495 	struct ukfs *fs = NULL;
    496 	int rv = 0, devfd = -1;
    497 	int mounted = 0;
    498 	int regged = 0;
    499 
    500 	if (part != ukfs_part_na) {
    501 		if ((rv = process_diskdevice(devpath, part,
    502 		    mntflags & MNT_RDONLY, &devfd)) != 0)
    503 			goto out;
    504 	}
    505 
    506 	fs = malloc(sizeof(struct ukfs));
    507 	if (fs == NULL) {
    508 		rv = ENOMEM;
    509 		goto out;
    510 	}
    511 	memset(fs, 0, sizeof(struct ukfs));
    512 
    513 	/* create our mountpoint.  this is never removed. */
    514 	if (builddirs(mountpath, 0777, rumpmkdir, NULL) == -1) {
    515 		if (errno != EEXIST) {
    516 			rv = errno;
    517 			goto out;
    518 		}
    519 	}
    520 
    521 	if (part != ukfs_part_na) {
    522 		/* LINTED */
    523 		rv = rump_pub_etfs_register_withsize(devpath, devpath,
    524 		    RUMP_ETFS_BLK, part->part_devoff, part->part_devsize);
    525 		if (rv) {
    526 			goto out;
    527 		}
    528 		regged = 1;
    529 	}
    530 
    531 	rv = rump_sys_mount(vfsname, mountpath, mntflags, arg, alen);
    532 	if (rv) {
    533 		rv = errno;
    534 		goto out;
    535 	}
    536 	mounted = 1;
    537 	rv = rump_pub_vfs_getmp(mountpath, &fs->ukfs_mp);
    538 	if (rv) {
    539 		goto out;
    540 	}
    541 	rv = rump_pub_vfs_root(fs->ukfs_mp, &fs->ukfs_rvp, 0);
    542 	if (rv) {
    543 		goto out;
    544 	}
    545 
    546 	if (regged) {
    547 		fs->ukfs_devpath = strdup(devpath);
    548 	}
    549 	fs->ukfs_mountpath = strdup(mountpath);
    550 	fs->ukfs_cdir = ukfs_getrvp(fs);
    551 	pthread_spin_init(&fs->ukfs_spin, PTHREAD_PROCESS_SHARED);
    552 	fs->ukfs_devfd = devfd;
    553 	fs->ukfs_part = part;
    554 	assert(rv == 0);
    555 
    556  out:
    557 	if (rv) {
    558 		if (fs) {
    559 			if (fs->ukfs_rvp)
    560 				rump_pub_vp_rele(fs->ukfs_rvp);
    561 			free(fs);
    562 			fs = NULL;
    563 		}
    564 		if (mounted)
    565 			rump_sys_unmount(mountpath, MNT_FORCE);
    566 		if (regged)
    567 			rump_pub_etfs_remove(devpath);
    568 		if (devfd != -1) {
    569 			unlockdev(fs->ukfs_devfd, fs->ukfs_part);
    570 			close(devfd);
    571 		}
    572 		ukfs_part_release(part);
    573 		errno = rv;
    574 	}
    575 
    576 	return fs;
    577 }
    578 
    579 struct ukfs *
    580 ukfs_mount(const char *vfsname, const char *devpath,
    581 	const char *mountpath, int mntflags, void *arg, size_t alen)
    582 {
    583 
    584 	return doukfsmount(vfsname, devpath, ukfs_part_na,
    585 	    mountpath, mntflags, arg, alen);
    586 }
    587 
    588 struct ukfs *
    589 ukfs_mount_disk(const char *vfsname, const char *devpath,
    590 	struct ukfs_part *part, const char *mountpath, int mntflags,
    591 	void *arg, size_t alen)
    592 {
    593 
    594 	return doukfsmount(vfsname, devpath, part,
    595 	    mountpath, mntflags, arg, alen);
    596 }
    597 
    598 int
    599 ukfs_release(struct ukfs *fs, int flags)
    600 {
    601 
    602 	if ((flags & UKFS_RELFLAG_NOUNMOUNT) == 0) {
    603 		int rv, mntflag, error;
    604 
    605 		ukfs_chdir(fs, "/");
    606 		mntflag = 0;
    607 		if (flags & UKFS_RELFLAG_FORCE)
    608 			mntflag = MNT_FORCE;
    609 		rump_pub_lwp_alloc_and_switch(nextpid(fs), 1);
    610 		rump_pub_vp_rele(fs->ukfs_rvp);
    611 		fs->ukfs_rvp = NULL;
    612 		rv = rump_sys_unmount(fs->ukfs_mountpath, mntflag);
    613 		if (rv == -1) {
    614 			error = errno;
    615 			rump_pub_vfs_root(fs->ukfs_mp, &fs->ukfs_rvp, 0);
    616 			rump_pub_lwp_release(rump_pub_lwp_curlwp());
    617 			ukfs_chdir(fs, fs->ukfs_mountpath);
    618 			errno = error;
    619 			return -1;
    620 		}
    621 		rump_pub_lwp_release(rump_pub_lwp_curlwp());
    622 	}
    623 
    624 	ukfs_part_release(fs->ukfs_part);
    625 	if (fs->ukfs_devpath) {
    626 		rump_pub_etfs_remove(fs->ukfs_devpath);
    627 		free(fs->ukfs_devpath);
    628 	}
    629 	free(fs->ukfs_mountpath);
    630 
    631 	pthread_spin_destroy(&fs->ukfs_spin);
    632 	if (fs->ukfs_devfd != -1) {
    633 		unlockdev(fs->ukfs_devfd, fs->ukfs_part);
    634 		close(fs->ukfs_devfd);
    635 	}
    636 	free(fs);
    637 
    638 	return 0;
    639 }
    640 
    641 void
    642 ukfs_part_release(struct ukfs_part *part)
    643 {
    644 
    645 	if (part != ukfs_part_none && part != ukfs_part_na)
    646 		free(part);
    647 }
    648 
    649 #define STDCALL(ukfs, thecall)						\
    650 	int rv = 0;							\
    651 									\
    652 	precall(ukfs);							\
    653 	rv = thecall;							\
    654 	postcall(ukfs);							\
    655 	return rv;
    656 
    657 int
    658 ukfs_opendir(struct ukfs *ukfs, const char *dirname, struct ukfs_dircookie **c)
    659 {
    660 	struct vnode *vp;
    661 	int rv;
    662 
    663 	precall(ukfs);
    664 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
    665 	    NULL, &vp, NULL);
    666 	postcall(ukfs);
    667 
    668 	if (rv == 0) {
    669 		RUMP_VOP_UNLOCK(vp, 0);
    670 	} else {
    671 		errno = rv;
    672 		rv = -1;
    673 	}
    674 
    675 	/*LINTED*/
    676 	*c = (struct ukfs_dircookie *)vp;
    677 	return rv;
    678 }
    679 
    680 static int
    681 getmydents(struct vnode *vp, off_t *off, uint8_t *buf, size_t bufsize)
    682 {
    683 	struct uio *uio;
    684 	size_t resid;
    685 	int rv, eofflag;
    686 	kauth_cred_t cred;
    687 
    688 	uio = rump_pub_uio_setup(buf, bufsize, *off, RUMPUIO_READ);
    689 	cred = rump_pub_cred_suserget();
    690 	rv = RUMP_VOP_READDIR(vp, uio, cred, &eofflag, NULL, NULL);
    691 	rump_pub_cred_put(cred);
    692 	RUMP_VOP_UNLOCK(vp, 0);
    693 	*off = rump_pub_uio_getoff(uio);
    694 	resid = rump_pub_uio_free(uio);
    695 
    696 	if (rv) {
    697 		errno = rv;
    698 		return -1;
    699 	}
    700 
    701 	/* LINTED: not totally correct return type, but follows syscall */
    702 	return bufsize - resid;
    703 }
    704 
    705 /*ARGSUSED*/
    706 int
    707 ukfs_getdents_cookie(struct ukfs *ukfs, struct ukfs_dircookie *c, off_t *off,
    708 	uint8_t *buf, size_t bufsize)
    709 {
    710 	/*LINTED*/
    711 	struct vnode *vp = (struct vnode *)c;
    712 
    713 	RUMP_VOP_LOCK(vp, RUMP_LK_SHARED);
    714 	return getmydents(vp, off, buf, bufsize);
    715 }
    716 
    717 int
    718 ukfs_getdents(struct ukfs *ukfs, const char *dirname, off_t *off,
    719 	uint8_t *buf, size_t bufsize)
    720 {
    721 	struct vnode *vp;
    722 	int rv;
    723 
    724 	precall(ukfs);
    725 	rv = rump_pub_namei(RUMP_NAMEI_LOOKUP, RUMP_NAMEI_LOCKLEAF, dirname,
    726 	    NULL, &vp, NULL);
    727 	postcall(ukfs);
    728 	if (rv) {
    729 		errno = rv;
    730 		return -1;
    731 	}
    732 
    733 	rv = getmydents(vp, off, buf, bufsize);
    734 	rump_pub_vp_rele(vp);
    735 	return rv;
    736 }
    737 
    738 /*ARGSUSED*/
    739 int
    740 ukfs_closedir(struct ukfs *ukfs, struct ukfs_dircookie *c)
    741 {
    742 
    743 	/*LINTED*/
    744 	rump_pub_vp_rele((struct vnode *)c);
    745 	return 0;
    746 }
    747 
    748 int
    749 ukfs_open(struct ukfs *ukfs, const char *filename, int flags)
    750 {
    751 	int fd;
    752 
    753 	precall(ukfs);
    754 	fd = rump_sys_open(filename, flags, 0);
    755 	postcall(ukfs);
    756 	if (fd == -1)
    757 		return -1;
    758 
    759 	return fd;
    760 }
    761 
    762 ssize_t
    763 ukfs_read(struct ukfs *ukfs, const char *filename, off_t off,
    764 	uint8_t *buf, size_t bufsize)
    765 {
    766 	int fd;
    767 	ssize_t xfer = -1; /* XXXgcc */
    768 
    769 	precall(ukfs);
    770 	fd = rump_sys_open(filename, RUMP_O_RDONLY, 0);
    771 	if (fd == -1)
    772 		goto out;
    773 
    774 	xfer = rump_sys_pread(fd, buf, bufsize, off);
    775 	rump_sys_close(fd);
    776 
    777  out:
    778 	postcall(ukfs);
    779 	if (fd == -1) {
    780 		return -1;
    781 	}
    782 	return xfer;
    783 }
    784 
    785 /*ARGSUSED*/
    786 ssize_t
    787 ukfs_read_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen)
    788 {
    789 
    790 	return rump_sys_pread(fd, buf, buflen, off);
    791 }
    792 
    793 ssize_t
    794 ukfs_write(struct ukfs *ukfs, const char *filename, off_t off,
    795 	uint8_t *buf, size_t bufsize)
    796 {
    797 	int fd;
    798 	ssize_t xfer = -1; /* XXXgcc */
    799 
    800 	precall(ukfs);
    801 	fd = rump_sys_open(filename, RUMP_O_WRONLY, 0);
    802 	if (fd == -1)
    803 		goto out;
    804 
    805 	/* write and commit */
    806 	xfer = rump_sys_pwrite(fd, buf, bufsize, off);
    807 	if (xfer > 0)
    808 		rump_sys_fsync(fd);
    809 
    810 	rump_sys_close(fd);
    811 
    812  out:
    813 	postcall(ukfs);
    814 	if (fd == -1) {
    815 		return -1;
    816 	}
    817 	return xfer;
    818 }
    819 
    820 /*ARGSUSED*/
    821 ssize_t
    822 ukfs_write_fd(struct ukfs *ukfs, int fd, off_t off, uint8_t *buf, size_t buflen,
    823 	int dosync)
    824 {
    825 	ssize_t xfer;
    826 
    827 	xfer = rump_sys_pwrite(fd, buf, buflen, off);
    828 	if (xfer > 0 && dosync)
    829 		rump_sys_fsync(fd);
    830 
    831 	return xfer;
    832 }
    833 
    834 /*ARGSUSED*/
    835 int
    836 ukfs_close(struct ukfs *ukfs, int fd)
    837 {
    838 
    839 	rump_sys_close(fd);
    840 	return 0;
    841 }
    842 
    843 int
    844 ukfs_create(struct ukfs *ukfs, const char *filename, mode_t mode)
    845 {
    846 	int fd;
    847 
    848 	precall(ukfs);
    849 	fd = rump_sys_open(filename, RUMP_O_WRONLY | RUMP_O_CREAT, mode);
    850 	if (fd == -1)
    851 		return -1;
    852 	rump_sys_close(fd);
    853 
    854 	postcall(ukfs);
    855 	return 0;
    856 }
    857 
    858 int
    859 ukfs_mknod(struct ukfs *ukfs, const char *path, mode_t mode, dev_t dev)
    860 {
    861 
    862 	STDCALL(ukfs, rump_sys_mknod(path, mode, dev));
    863 }
    864 
    865 int
    866 ukfs_mkfifo(struct ukfs *ukfs, const char *path, mode_t mode)
    867 {
    868 
    869 	STDCALL(ukfs, rump_sys_mkfifo(path, mode));
    870 }
    871 
    872 int
    873 ukfs_mkdir(struct ukfs *ukfs, const char *filename, mode_t mode)
    874 {
    875 
    876 	STDCALL(ukfs, rump_sys_mkdir(filename, mode));
    877 }
    878 
    879 int
    880 ukfs_remove(struct ukfs *ukfs, const char *filename)
    881 {
    882 
    883 	STDCALL(ukfs, rump_sys_unlink(filename));
    884 }
    885 
    886 int
    887 ukfs_rmdir(struct ukfs *ukfs, const char *filename)
    888 {
    889 
    890 	STDCALL(ukfs, rump_sys_rmdir(filename));
    891 }
    892 
    893 int
    894 ukfs_link(struct ukfs *ukfs, const char *filename, const char *f_create)
    895 {
    896 
    897 	STDCALL(ukfs, rump_sys_link(filename, f_create));
    898 }
    899 
    900 int
    901 ukfs_symlink(struct ukfs *ukfs, const char *filename, const char *linkname)
    902 {
    903 
    904 	STDCALL(ukfs, rump_sys_symlink(filename, linkname));
    905 }
    906 
    907 ssize_t
    908 ukfs_readlink(struct ukfs *ukfs, const char *filename,
    909 	char *linkbuf, size_t buflen)
    910 {
    911 	ssize_t rv;
    912 
    913 	precall(ukfs);
    914 	rv = rump_sys_readlink(filename, linkbuf, buflen);
    915 	postcall(ukfs);
    916 	return rv;
    917 }
    918 
    919 int
    920 ukfs_rename(struct ukfs *ukfs, const char *from, const char *to)
    921 {
    922 
    923 	STDCALL(ukfs, rump_sys_rename(from, to));
    924 }
    925 
    926 int
    927 ukfs_chdir(struct ukfs *ukfs, const char *path)
    928 {
    929 	struct vnode *newvp, *oldvp;
    930 	int rv;
    931 
    932 	precall(ukfs);
    933 	rv = rump_sys_chdir(path);
    934 	if (rv == -1)
    935 		goto out;
    936 
    937 	newvp = rump_pub_cdir_get();
    938 	pthread_spin_lock(&ukfs->ukfs_spin);
    939 	oldvp = ukfs->ukfs_cdir;
    940 	ukfs->ukfs_cdir = newvp;
    941 	pthread_spin_unlock(&ukfs->ukfs_spin);
    942 	if (oldvp)
    943 		rump_pub_vp_rele(oldvp);
    944 
    945  out:
    946 	postcall(ukfs);
    947 	return rv;
    948 }
    949 
    950 /*
    951  * If we want to use post-time_t file systems on pre-time_t hosts,
    952  * we must translate the stat structure.  Since we don't currently
    953  * have a general method for making compat calls in rump, special-case
    954  * this one.
    955  *
    956  * Note that this does not allow making system calls to older rump
    957  * kernels from newer hosts.
    958  */
    959 #define VERS_TIMECHANGE 599000700
    960 
    961 static int
    962 needcompat(void)
    963 {
    964 
    965 #ifdef __NetBSD__
    966 	/*LINTED*/
    967 	return __NetBSD_Version__ < VERS_TIMECHANGE
    968 	    && rump_pub_getversion() >= VERS_TIMECHANGE;
    969 #else
    970 	return 0;
    971 #endif
    972 }
    973 
    974 int
    975 ukfs_stat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
    976 {
    977 	int rv;
    978 
    979 	precall(ukfs);
    980 	if (needcompat())
    981 		rv = rump_pub_sys___stat30(filename, file_stat);
    982 	else
    983 		rv = rump_sys_stat(filename, file_stat);
    984 	postcall(ukfs);
    985 
    986 	return rv;
    987 }
    988 
    989 int
    990 ukfs_lstat(struct ukfs *ukfs, const char *filename, struct stat *file_stat)
    991 {
    992 	int rv;
    993 
    994 	precall(ukfs);
    995 	if (needcompat())
    996 		rv = rump_pub_sys___lstat30(filename, file_stat);
    997 	else
    998 		rv = rump_sys_lstat(filename, file_stat);
    999 	postcall(ukfs);
   1000 
   1001 	return rv;
   1002 }
   1003 
   1004 int
   1005 ukfs_chmod(struct ukfs *ukfs, const char *filename, mode_t mode)
   1006 {
   1007 
   1008 	STDCALL(ukfs, rump_sys_chmod(filename, mode));
   1009 }
   1010 
   1011 int
   1012 ukfs_lchmod(struct ukfs *ukfs, const char *filename, mode_t mode)
   1013 {
   1014 
   1015 	STDCALL(ukfs, rump_sys_lchmod(filename, mode));
   1016 }
   1017 
   1018 int
   1019 ukfs_chown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
   1020 {
   1021 
   1022 	STDCALL(ukfs, rump_sys_chown(filename, uid, gid));
   1023 }
   1024 
   1025 int
   1026 ukfs_lchown(struct ukfs *ukfs, const char *filename, uid_t uid, gid_t gid)
   1027 {
   1028 
   1029 	STDCALL(ukfs, rump_sys_lchown(filename, uid, gid));
   1030 }
   1031 
   1032 int
   1033 ukfs_chflags(struct ukfs *ukfs, const char *filename, u_long flags)
   1034 {
   1035 
   1036 	STDCALL(ukfs, rump_sys_chflags(filename, flags));
   1037 }
   1038 
   1039 int
   1040 ukfs_lchflags(struct ukfs *ukfs, const char *filename, u_long flags)
   1041 {
   1042 
   1043 	STDCALL(ukfs, rump_sys_lchflags(filename, flags));
   1044 }
   1045 
   1046 int
   1047 ukfs_utimes(struct ukfs *ukfs, const char *filename, const struct timeval *tptr)
   1048 {
   1049 
   1050 	STDCALL(ukfs, rump_sys_utimes(filename, tptr));
   1051 }
   1052 
   1053 int
   1054 ukfs_lutimes(struct ukfs *ukfs, const char *filename,
   1055 	      const struct timeval *tptr)
   1056 {
   1057 
   1058 	STDCALL(ukfs, rump_sys_lutimes(filename, tptr));
   1059 }
   1060 
   1061 /*
   1062  * Dynamic module support
   1063  */
   1064 
   1065 /* load one library */
   1066 
   1067 /*
   1068  * XXX: the dlerror stuff isn't really threadsafe, but then again I
   1069  * can't protect against other threads calling dl*() outside of ukfs,
   1070  * so just live with it being flimsy
   1071  */
   1072 int
   1073 ukfs_modload(const char *fname)
   1074 {
   1075 	void *handle;
   1076 	struct modinfo **mi;
   1077 	int error;
   1078 
   1079 	handle = dlopen(fname, RTLD_LAZY|RTLD_GLOBAL);
   1080 	if (handle == NULL) {
   1081 		const char *dlmsg = dlerror();
   1082 		if (strstr(dlmsg, "Undefined symbol"))
   1083 			return 0;
   1084 		warnx("dlopen %s failed: %s\n", fname, dlmsg);
   1085 		/* XXXerrno */
   1086 		return -1;
   1087 	}
   1088 
   1089 	mi = dlsym(handle, "__start_link_set_modules");
   1090 	if (mi) {
   1091 		error = rump_pub_module_init(*mi, NULL);
   1092 		if (error)
   1093 			goto errclose;
   1094 		return 1;
   1095 	}
   1096 	error = EINVAL;
   1097 
   1098  errclose:
   1099 	dlclose(handle);
   1100 	errno = error;
   1101 	return -1;
   1102 }
   1103 
   1104 struct loadfail {
   1105 	char *pname;
   1106 
   1107 	LIST_ENTRY(loadfail) entries;
   1108 };
   1109 
   1110 #define RUMPFSMOD_PREFIX "librumpfs_"
   1111 #define RUMPFSMOD_SUFFIX ".so"
   1112 
   1113 int
   1114 ukfs_modload_dir(const char *dir)
   1115 {
   1116 	char nbuf[MAXPATHLEN+1], *p;
   1117 	struct dirent entry, *result;
   1118 	DIR *libdir;
   1119 	struct loadfail *lf, *nlf;
   1120 	int error, nloaded = 0, redo;
   1121 	LIST_HEAD(, loadfail) lfs;
   1122 
   1123 	libdir = opendir(dir);
   1124 	if (libdir == NULL)
   1125 		return -1;
   1126 
   1127 	LIST_INIT(&lfs);
   1128 	for (;;) {
   1129 		if ((error = readdir_r(libdir, &entry, &result)) != 0)
   1130 			break;
   1131 		if (!result)
   1132 			break;
   1133 		if (strncmp(result->d_name, RUMPFSMOD_PREFIX,
   1134 		    strlen(RUMPFSMOD_PREFIX)) != 0)
   1135 			continue;
   1136 		if (((p = strstr(result->d_name, RUMPFSMOD_SUFFIX)) == NULL)
   1137 		    || strlen(p) != strlen(RUMPFSMOD_SUFFIX))
   1138 			continue;
   1139 		strlcpy(nbuf, dir, sizeof(nbuf));
   1140 		strlcat(nbuf, "/", sizeof(nbuf));
   1141 		strlcat(nbuf, result->d_name, sizeof(nbuf));
   1142 		switch (ukfs_modload(nbuf)) {
   1143 		case 0:
   1144 			lf = malloc(sizeof(*lf));
   1145 			if (lf == NULL) {
   1146 				error = ENOMEM;
   1147 				break;
   1148 			}
   1149 			lf->pname = strdup(nbuf);
   1150 			if (lf->pname == NULL) {
   1151 				free(lf);
   1152 				error = ENOMEM;
   1153 				break;
   1154 			}
   1155 			LIST_INSERT_HEAD(&lfs, lf, entries);
   1156 			break;
   1157 		case 1:
   1158 			nloaded++;
   1159 			break;
   1160 		default:
   1161 			/* ignore errors */
   1162 			break;
   1163 		}
   1164 	}
   1165 	closedir(libdir);
   1166 	if (error && nloaded != 0)
   1167 		error = 0;
   1168 
   1169 	/*
   1170 	 * El-cheapo dependency calculator.  Just try to load the
   1171 	 * modules n times in a loop
   1172 	 */
   1173 	for (redo = 1; redo;) {
   1174 		redo = 0;
   1175 		nlf = LIST_FIRST(&lfs);
   1176 		while ((lf = nlf) != NULL) {
   1177 			nlf = LIST_NEXT(lf, entries);
   1178 			if (ukfs_modload(lf->pname) == 1) {
   1179 				nloaded++;
   1180 				redo = 1;
   1181 				LIST_REMOVE(lf, entries);
   1182 				free(lf->pname);
   1183 				free(lf);
   1184 			}
   1185 		}
   1186 	}
   1187 
   1188 	while ((lf = LIST_FIRST(&lfs)) != NULL) {
   1189 		LIST_REMOVE(lf, entries);
   1190 		free(lf->pname);
   1191 		free(lf);
   1192 	}
   1193 
   1194 	if (error && nloaded == 0) {
   1195 		errno = error;
   1196 		return -1;
   1197 	}
   1198 
   1199 	return nloaded;
   1200 }
   1201 
   1202 /* XXX: this code uses definitions from NetBSD, needs rumpdefs */
   1203 ssize_t
   1204 ukfs_vfstypes(char *buf, size_t buflen)
   1205 {
   1206 	int mib[3];
   1207 	struct sysctlnode q, ans[128];
   1208 	size_t alen;
   1209 	int i;
   1210 
   1211 	mib[0] = CTL_VFS;
   1212 	mib[1] = VFS_GENERIC;
   1213 	mib[2] = CTL_QUERY;
   1214 	alen = sizeof(ans);
   1215 
   1216 	memset(&q, 0, sizeof(q));
   1217 	q.sysctl_flags = SYSCTL_VERSION;
   1218 
   1219 	if (rump_sys___sysctl(mib, 3, ans, &alen, &q, sizeof(q)) == -1) {
   1220 		return -1;
   1221 	}
   1222 
   1223 	for (i = 0; i < alen/sizeof(ans[0]); i++)
   1224 		if (strcmp("fstypes", ans[i].sysctl_name) == 0)
   1225 			break;
   1226 	if (i == alen/sizeof(ans[0])) {
   1227 		errno = ENXIO;
   1228 		return -1;
   1229 	}
   1230 
   1231 	mib[0] = CTL_VFS;
   1232 	mib[1] = VFS_GENERIC;
   1233 	mib[2] = ans[i].sysctl_num;
   1234 
   1235 	if (rump_sys___sysctl(mib, 3, buf, &buflen, NULL, 0) == -1) {
   1236 		return -1;
   1237 	}
   1238 
   1239 	return buflen;
   1240 }
   1241 
   1242 /*
   1243  * Utilities
   1244  */
   1245 static int
   1246 builddirs(const char *pathname, mode_t mode,
   1247 	int (*mkdirfn)(struct ukfs *, const char *, mode_t), struct ukfs *fs)
   1248 {
   1249 	char *f1, *f2;
   1250 	int rv;
   1251 	mode_t mask;
   1252 	bool end;
   1253 
   1254 	/*ukfs_umask((mask = ukfs_umask(0)));*/
   1255 	umask((mask = umask(0)));
   1256 
   1257 	f1 = f2 = strdup(pathname);
   1258 	if (f1 == NULL) {
   1259 		errno = ENOMEM;
   1260 		return -1;
   1261 	}
   1262 
   1263 	end = false;
   1264 	for (;;) {
   1265 		/* find next component */
   1266 		f2 += strspn(f2, "/");
   1267 		f2 += strcspn(f2, "/");
   1268 		if (*f2 == '\0')
   1269 			end = true;
   1270 		else
   1271 			*f2 = '\0';
   1272 
   1273 		rv = mkdirfn(fs, f1, mode & ~mask);
   1274 		if (errno == EEXIST)
   1275 			rv = 0;
   1276 
   1277 		if (rv == -1 || *f2 != '\0' || end)
   1278 			break;
   1279 
   1280 		*f2 = '/';
   1281 	}
   1282 
   1283 	free(f1);
   1284 
   1285 	return rv;
   1286 }
   1287 
   1288 int
   1289 ukfs_util_builddirs(struct ukfs *ukfs, const char *pathname, mode_t mode)
   1290 {
   1291 
   1292 	return builddirs(pathname, mode, ukfs_mkdir, ukfs);
   1293 }
   1294