Home | History | Annotate | Line # | Download | only in librumphijack
hijack.c revision 1.16.2.1
      1 /*      $NetBSD: hijack.c,v 1.16.2.1 2011/01/20 14:24:53 bouyer Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __RCSID("$NetBSD: hijack.c,v 1.16.2.1 2011/01/20 14:24:53 bouyer Exp $");
     30 
     31 #include <sys/param.h>
     32 #include <sys/types.h>
     33 #include <sys/event.h>
     34 #include <sys/ioctl.h>
     35 #include <sys/socket.h>
     36 #include <sys/poll.h>
     37 
     38 #include <rump/rumpclient.h>
     39 #include <rump/rump_syscalls.h>
     40 
     41 #include <assert.h>
     42 #include <dlfcn.h>
     43 #include <err.h>
     44 #include <errno.h>
     45 #include <fcntl.h>
     46 #include <poll.h>
     47 #include <pthread.h>
     48 #include <signal.h>
     49 #include <stdarg.h>
     50 #include <stdbool.h>
     51 #include <stdio.h>
     52 #include <stdlib.h>
     53 #include <time.h>
     54 #include <unistd.h>
     55 
     56 enum {	RUMPCALL_SOCKET, RUMPCALL_ACCEPT, RUMPCALL_BIND, RUMPCALL_CONNECT,
     57 	RUMPCALL_GETPEERNAME, RUMPCALL_GETSOCKNAME, RUMPCALL_LISTEN,
     58 	RUMPCALL_RECVFROM, RUMPCALL_RECVMSG,
     59 	RUMPCALL_SENDTO, RUMPCALL_SENDMSG,
     60 	RUMPCALL_GETSOCKOPT, RUMPCALL_SETSOCKOPT,
     61 	RUMPCALL_SHUTDOWN,
     62 	RUMPCALL_READ, RUMPCALL_READV,
     63 	RUMPCALL_WRITE, RUMPCALL_WRITEV,
     64 	RUMPCALL_IOCTL, RUMPCALL_FCNTL,
     65 	RUMPCALL_CLOSE,
     66 	RUMPCALL_POLLTS,
     67 	RUMPCALL_QUOTACTL,
     68 	RUMPCALL_MOUNT,
     69 	RUMPCALL_GETVFSSTAT,
     70 	RUMPCALL__NUM
     71 };
     72 
     73 #define RSYS_STRING(a) __STRING(a)
     74 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
     75 
     76 const char *sysnames[] = {
     77 	RSYS_NAME(SOCKET),
     78 	RSYS_NAME(ACCEPT),
     79 	RSYS_NAME(BIND),
     80 	RSYS_NAME(CONNECT),
     81 	RSYS_NAME(GETPEERNAME),
     82 	RSYS_NAME(GETSOCKNAME),
     83 	RSYS_NAME(LISTEN),
     84 	RSYS_NAME(RECVFROM),
     85 	RSYS_NAME(RECVMSG),
     86 	RSYS_NAME(SENDTO),
     87 	RSYS_NAME(SENDMSG),
     88 	RSYS_NAME(GETSOCKOPT),
     89 	RSYS_NAME(SETSOCKOPT),
     90 	RSYS_NAME(SHUTDOWN),
     91 	RSYS_NAME(READ),
     92 	RSYS_NAME(READV),
     93 	RSYS_NAME(WRITE),
     94 	RSYS_NAME(WRITEV),
     95 	RSYS_NAME(IOCTL),
     96 	RSYS_NAME(FCNTL),
     97 	RSYS_NAME(CLOSE),
     98 	RSYS_NAME(POLLTS),
     99 	RSYS_NAME(QUOTACTL),
    100 	RSYS_NAME(MOUNT),
    101 	RSYS_NAME(GETVFSSTAT),
    102 };
    103 
    104 static int	(*host_socket)(int, int, int);
    105 static int	(*host_connect)(int, const struct sockaddr *, socklen_t);
    106 static int	(*host_bind)(int, const struct sockaddr *, socklen_t);
    107 static int	(*host_listen)(int, int);
    108 static int	(*host_accept)(int, struct sockaddr *, socklen_t *);
    109 static int	(*host_getpeername)(int, struct sockaddr *, socklen_t *);
    110 static int	(*host_getsockname)(int, struct sockaddr *, socklen_t *);
    111 static int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
    112 
    113 static ssize_t	(*host_read)(int, void *, size_t);
    114 static ssize_t	(*host_readv)(int, const struct iovec *, int);
    115 static ssize_t	(*host_write)(int, const void *, size_t);
    116 static ssize_t	(*host_writev)(int, const struct iovec *, int);
    117 static int	(*host_ioctl)(int, unsigned long, ...);
    118 static int	(*host_fcntl)(int, int, ...);
    119 static int	(*host_close)(int);
    120 static int	(*host_pollts)(struct pollfd *, nfds_t,
    121 			       const struct timespec *, const sigset_t *);
    122 static pid_t	(*host_fork)(void);
    123 static int	(*host_dup2)(int, int);
    124 static int	(*host_shutdown)(int, int);
    125 /* XXX */
    126 static void	*host_sendto;
    127 static void	*host_recvfrom;
    128 
    129 static void *rumpcalls[RUMPCALL__NUM];
    130 
    131 /*
    132  * Would be nice to get this automatically in sync with libc.
    133  * Also, this does not work for compat-using binaries!
    134  */
    135 
    136 #if !__NetBSD_Prereq__(5,99,7)
    137 #define SELECT select
    138 #define POLLTS pollts
    139 #define POLL poll
    140 #else
    141 #define SELECT __select50
    142 #define POLLTS __pollts50
    143 #define POLL __poll50
    144 
    145 int SELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
    146 int POLLTS(struct pollfd *, nfds_t, const struct timespec *, const sigset_t *);
    147 int POLL(struct pollfd *, nfds_t, int);
    148 #endif
    149 
    150 /*
    151  * This is called from librumpclient in case of LD_PRELOAD.
    152  * It ensures correct RTLD_NEXT.
    153  */
    154 static void *
    155 hijackdlsym(void *handle, const char *symbol)
    156 {
    157 
    158 	return dlsym(handle, symbol);
    159 }
    160 
    161 /* low calorie sockets? */
    162 static bool hostlocalsockets = true;
    163 
    164 static void __attribute__((constructor))
    165 rcinit(void)
    166 {
    167 	int (*rumpcinit)(void);
    168 	void **rumpcdlsym;
    169 	void *hand;
    170 	int i;
    171 
    172 	hand = dlopen("librumpclient.so", RTLD_LAZY|RTLD_GLOBAL);
    173 	if (!hand)
    174 		err(1, "cannot open librumpclient.so");
    175 	rumpcinit = dlsym(hand, "rumpclient_init");
    176 	_DIAGASSERT(rumpcinit);
    177 
    178 	rumpcdlsym = dlsym(hand, "rumpclient_dlsym");
    179 	*rumpcdlsym = hijackdlsym;
    180 
    181 	host_socket = dlsym(RTLD_NEXT, "__socket30");
    182 	host_listen = dlsym(RTLD_NEXT, "listen");
    183 	host_connect = dlsym(RTLD_NEXT, "connect");
    184 	host_bind = dlsym(RTLD_NEXT, "bind");
    185 	host_accept = dlsym(RTLD_NEXT, "accept");
    186 	host_getpeername = dlsym(RTLD_NEXT, "getpeername");
    187 	host_getsockname = dlsym(RTLD_NEXT, "getsockname");
    188 	host_setsockopt = dlsym(RTLD_NEXT, "setsockopt");
    189 
    190 	host_read = dlsym(RTLD_NEXT, "read");
    191 	host_readv = dlsym(RTLD_NEXT, "readv");
    192 	host_write = dlsym(RTLD_NEXT, "write");
    193 	host_writev = dlsym(RTLD_NEXT, "writev");
    194 	host_ioctl = dlsym(RTLD_NEXT, "ioctl");
    195 	host_fcntl = dlsym(RTLD_NEXT, "fcntl");
    196 	host_close = dlsym(RTLD_NEXT, "close");
    197 	host_pollts = dlsym(RTLD_NEXT, "pollts");
    198 	host_fork = dlsym(RTLD_NEXT, "fork");
    199 	host_dup2 = dlsym(RTLD_NEXT, "dup2");
    200 	host_shutdown = dlsym(RTLD_NEXT, "shutdown");
    201 	host_sendto = dlsym(RTLD_NEXT, "sendto");
    202 	host_recvfrom = dlsym(RTLD_NEXT, "recvfrom");
    203 
    204 	for (i = 0; i < RUMPCALL__NUM; i++) {
    205 		rumpcalls[i] = dlsym(hand, sysnames[i]);
    206 		if (!rumpcalls[i]) {
    207 			fprintf(stderr, "rumphijack: cannot find symbol: %s\n",
    208 			    sysnames[i]);
    209 			exit(1);
    210 		}
    211 	}
    212 
    213 	if (rumpcinit() == -1)
    214 		err(1, "rumpclient init");
    215 }
    216 
    217 static unsigned dup2mask;
    218 #define ISDUP2D(fd) (1<<(fd) & dup2mask)
    219 
    220 //#define DEBUGJACK
    221 #ifdef DEBUGJACK
    222 #define DPRINTF(x) mydprintf x
    223 static void
    224 mydprintf(const char *fmt, ...)
    225 {
    226 	va_list ap;
    227 
    228 	if (ISDUP2D(STDERR_FILENO))
    229 		return;
    230 
    231 	va_start(ap, fmt);
    232 	vfprintf(stderr, fmt, ap);
    233 	va_end(ap);
    234 }
    235 
    236 #else
    237 #define DPRINTF(x)
    238 #endif
    239 
    240 /* XXX: need runtime selection.  low for now due to FD_SETSIZE */
    241 #define HIJACK_FDOFF 128
    242 #define HIJACK_SELECT 128 /* XXX */
    243 #define HIJACK_ASSERT 128 /* XXX */
    244 static int
    245 fd_rump2host(int fd)
    246 {
    247 
    248 	if (fd == -1)
    249 		return fd;
    250 
    251 	if (!ISDUP2D(fd))
    252 		fd += HIJACK_FDOFF;
    253 
    254 	return fd;
    255 }
    256 
    257 static int
    258 fd_host2rump(int fd)
    259 {
    260 
    261 	if (!ISDUP2D(fd))
    262 		fd -= HIJACK_FDOFF;
    263 	return fd;
    264 }
    265 
    266 static bool
    267 fd_isrump(int fd)
    268 {
    269 
    270 	return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
    271 }
    272 
    273 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
    274 #undef HIJACK_FDOFF
    275 
    276 int
    277 __quotactl50(const char * mnt, struct plistref *p)
    278 {
    279 	int (*qctl)(const char *, struct plistref *);
    280 	int error;
    281 
    282 	qctl = rumpcalls[RUMPCALL_QUOTACTL];
    283 	error = qctl(mnt, p);
    284 	DPRINTF(("quotactl <- %d\n", error));
    285 	return error;
    286 }
    287 
    288 int
    289 __mount50(const char *type, const char *dir, int flags, void *data,
    290     size_t data_len)
    291 {
    292 	int (*domount)(const char *, const char *, int, void *, size_t);
    293 	int error;
    294 
    295 	domount = rumpcalls[RUMPCALL_MOUNT];
    296 	error = domount(type, dir, flags, data, data_len);
    297 	DPRINTF(("mount <- %d\n", error));
    298 	return error;
    299 }
    300 
    301 int
    302 getvfsstat(struct statvfs *buf, size_t bufsize, int flags)
    303 {
    304 	int (*dogetvfsstat)(struct statvfs *, size_t, int);
    305 	int error;
    306 
    307 	dogetvfsstat = rumpcalls[RUMPCALL_GETVFSSTAT];
    308 	error = dogetvfsstat(buf, bufsize, flags);
    309 	DPRINTF(("getvfsstat <- %d\n", error));
    310 	return error;
    311 }
    312 
    313 int __socket30(int, int, int);
    314 int
    315 __socket30(int domain, int type, int protocol)
    316 {
    317 	int (*rc_socket)(int, int, int);
    318 	int fd;
    319 	bool dohost;
    320 
    321 	dohost = hostlocalsockets && (domain == AF_LOCAL);
    322 
    323 	if (dohost)
    324 		rc_socket = host_socket;
    325 	else
    326 		rc_socket = rumpcalls[RUMPCALL_SOCKET];
    327 	fd = rc_socket(domain, type, protocol);
    328 
    329 	if (!dohost)
    330 		fd = fd_rump2host(fd);
    331 	DPRINTF(("socket <- %d\n", fd));
    332 
    333 	return fd;
    334 }
    335 
    336 int
    337 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
    338 {
    339 	int (*rc_accept)(int, struct sockaddr *, socklen_t *);
    340 	int fd;
    341 	bool isrump;
    342 
    343 	isrump = fd_isrump(s);
    344 
    345 	DPRINTF(("accept -> %d", s));
    346 	if (isrump) {
    347 		rc_accept = rumpcalls[RUMPCALL_ACCEPT];
    348 		s = fd_host2rump(s);
    349 	} else {
    350 		rc_accept = host_accept;
    351 	}
    352 	fd = rc_accept(s, addr, addrlen);
    353 	if (fd != -1 && isrump)
    354 		fd = fd_rump2host(fd);
    355 
    356 	DPRINTF((" <- %d\n", fd));
    357 
    358 	return fd;
    359 }
    360 
    361 int
    362 bind(int s, const struct sockaddr *name, socklen_t namelen)
    363 {
    364 	int (*rc_bind)(int, const struct sockaddr *, socklen_t);
    365 
    366 	DPRINTF(("bind -> %d\n", s));
    367 	if (fd_isrump(s)) {
    368 		rc_bind = rumpcalls[RUMPCALL_BIND];
    369 		s = fd_host2rump(s);
    370 	} else {
    371 		rc_bind = host_bind;
    372 	}
    373 	return rc_bind(s, name, namelen);
    374 }
    375 
    376 int
    377 connect(int s, const struct sockaddr *name, socklen_t namelen)
    378 {
    379 	int (*rc_connect)(int, const struct sockaddr *, socklen_t);
    380 
    381 	DPRINTF(("connect -> %d\n", s));
    382 	if (fd_isrump(s)) {
    383 		rc_connect = rumpcalls[RUMPCALL_CONNECT];
    384 		s = fd_host2rump(s);
    385 	} else {
    386 		rc_connect = host_connect;
    387 	}
    388 
    389 	return rc_connect(s, name, namelen);
    390 }
    391 
    392 int
    393 getpeername(int s, struct sockaddr *name, socklen_t *namelen)
    394 {
    395 	int (*rc_getpeername)(int, struct sockaddr *, socklen_t *);
    396 
    397 	DPRINTF(("getpeername -> %d\n", s));
    398 	if (fd_isrump(s)) {
    399 		rc_getpeername = rumpcalls[RUMPCALL_GETPEERNAME];
    400 		s = fd_host2rump(s);
    401 	} else {
    402 		rc_getpeername = host_getpeername;
    403 	}
    404 	return rc_getpeername(s, name, namelen);
    405 }
    406 
    407 int
    408 getsockname(int s, struct sockaddr *name, socklen_t *namelen)
    409 {
    410 	int (*rc_getsockname)(int, struct sockaddr *, socklen_t *);
    411 
    412 	DPRINTF(("getsockname -> %d\n", s));
    413 	if (fd_isrump(s)) {
    414 		rc_getsockname = rumpcalls[RUMPCALL_GETSOCKNAME];
    415 		s = fd_host2rump(s);
    416 	} else {
    417 		rc_getsockname = host_getsockname;
    418 	}
    419 	return rc_getsockname(s, name, namelen);
    420 }
    421 
    422 int
    423 listen(int s, int backlog)
    424 {
    425 	int (*rc_listen)(int, int);
    426 
    427 	DPRINTF(("listen -> %d\n", s));
    428 	if (fd_isrump(s)) {
    429 		rc_listen = rumpcalls[RUMPCALL_LISTEN];
    430 		s = fd_host2rump(s);
    431 	} else {
    432 		rc_listen = host_listen;
    433 	}
    434 	return rc_listen(s, backlog);
    435 }
    436 
    437 ssize_t
    438 recv(int s, void *buf, size_t len, int flags)
    439 {
    440 
    441 	return recvfrom(s, buf, len, flags, NULL, NULL);
    442 }
    443 
    444 ssize_t
    445 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
    446 	socklen_t *fromlen)
    447 {
    448 	int (*rc_recvfrom)(int, void *, size_t, int,
    449 	    struct sockaddr *, socklen_t *);
    450 
    451 	DPRINTF(("recvfrom\n"));
    452 	if (fd_isrump(s)) {
    453 		rc_recvfrom = rumpcalls[RUMPCALL_RECVFROM];
    454 		s = fd_host2rump(s);
    455 	} else {
    456 		rc_recvfrom = host_recvfrom;
    457 	}
    458 
    459 	return rc_recvfrom(s, buf, len, flags, from, fromlen);
    460 }
    461 
    462 ssize_t
    463 recvmsg(int s, struct msghdr *msg, int flags)
    464 {
    465 	int (*rc_recvmsg)(int, struct msghdr *, int);
    466 
    467 	DPRINTF(("recvmsg\n"));
    468 	assertfd(s);
    469 	rc_recvmsg = rumpcalls[RUMPCALL_RECVMSG];
    470 	return rc_recvmsg(fd_host2rump(s), msg, flags);
    471 }
    472 
    473 ssize_t
    474 send(int s, const void *buf, size_t len, int flags)
    475 {
    476 
    477 	return sendto(s, buf, len, flags, NULL, 0);
    478 }
    479 
    480 ssize_t
    481 sendto(int s, const void *buf, size_t len, int flags,
    482 	const struct sockaddr *to, socklen_t tolen)
    483 {
    484 	int (*rc_sendto)(int, const void *, size_t, int,
    485 	    const struct sockaddr *, socklen_t);
    486 
    487 	if (s == -1)
    488 		return len;
    489 	DPRINTF(("sendto\n"));
    490 
    491 	if (fd_isrump(s)) {
    492 		rc_sendto = rumpcalls[RUMPCALL_SENDTO];
    493 		s = fd_host2rump(s);
    494 	} else {
    495 		rc_sendto = host_sendto;
    496 	}
    497 	return rc_sendto(s, buf, len, flags, to, tolen);
    498 }
    499 
    500 ssize_t
    501 sendmsg(int s, const struct msghdr *msg, int flags)
    502 {
    503 	int (*rc_sendmsg)(int, const struct msghdr *, int);
    504 
    505 	DPRINTF(("sendmsg\n"));
    506 	assertfd(s);
    507 	rc_sendmsg = rumpcalls[RUMPCALL_SENDTO];
    508 	return rc_sendmsg(fd_host2rump(s), msg, flags);
    509 }
    510 
    511 int
    512 getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
    513 {
    514 	int (*rc_getsockopt)(int, int, int, void *, socklen_t *);
    515 
    516 	DPRINTF(("getsockopt -> %d\n", s));
    517 	assertfd(s);
    518 	rc_getsockopt = rumpcalls[RUMPCALL_GETSOCKOPT];
    519 	return rc_getsockopt(fd_host2rump(s), level, optname, optval, optlen);
    520 }
    521 
    522 int
    523 setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen)
    524 {
    525 	int (*rc_setsockopt)(int, int, int, const void *, socklen_t);
    526 
    527 	DPRINTF(("setsockopt -> %d\n", s));
    528 	if (fd_isrump(s)) {
    529 		rc_setsockopt = rumpcalls[RUMPCALL_SETSOCKOPT];
    530 		s = fd_host2rump(s);
    531 	} else {
    532 		rc_setsockopt = host_setsockopt;
    533 	}
    534 	return rc_setsockopt(s, level, optname, optval, optlen);
    535 }
    536 
    537 int
    538 shutdown(int s, int how)
    539 {
    540 	int (*rc_shutdown)(int, int);
    541 
    542 	DPRINTF(("shutdown -> %d\n", s));
    543 	if (fd_isrump(s)) {
    544 		rc_shutdown = rumpcalls[RUMPCALL_SHUTDOWN];
    545 		s = fd_host2rump(s);
    546 	} else {
    547 		rc_shutdown = host_shutdown;
    548 	}
    549 	return rc_shutdown(s, how);
    550 }
    551 
    552 /*
    553  * dup2 is special.  we allow dup2 of a rump kernel fd to 0-2 since
    554  * many programs do that.  dup2 of a rump kernel fd to another value
    555  * not >= fdoff is an error.
    556  *
    557  * Note: cannot rump2host newd, because it is often hardcoded.
    558  *
    559  * XXX: should disable debug prints after stdout/stderr are dup2'd
    560  */
    561 int
    562 dup2(int oldd, int newd)
    563 {
    564 	int rv;
    565 
    566 	DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
    567 
    568 	if (fd_isrump(oldd)) {
    569 		if (!(newd >= 0 && newd <= 2))
    570 			return EBADF;
    571 		oldd = fd_host2rump(oldd);
    572 		rv = rump_sys_dup2(oldd, newd);
    573 		if (rv != -1)
    574 			dup2mask |= 1<<newd;
    575 	} else {
    576 		rv = host_dup2(oldd, newd);
    577 	}
    578 
    579 	return rv;
    580 }
    581 
    582 /*
    583  * We just wrap fork the appropriate rump client calls to preserve
    584  * the file descriptors of the forked parent in the child, but
    585  * prevent double use of connection fd.
    586  */
    587 
    588 pid_t
    589 fork()
    590 {
    591 	struct rumpclient_fork *rf;
    592 	pid_t rv;
    593 
    594 	DPRINTF(("fork\n"));
    595 
    596 	if ((rf = rumpclient_prefork()) == NULL)
    597 		return -1;
    598 
    599 	switch ((rv = host_fork())) {
    600 	case -1:
    601 		/* XXX: cancel rf */
    602 		break;
    603 	case 0:
    604 		if (rumpclient_fork_init(rf) == -1)
    605 			rv = -1;
    606 		break;
    607 	default:
    608 		break;
    609 	}
    610 
    611 	DPRINTF(("fork returns %d\n", rv));
    612 	return rv;
    613 }
    614 
    615 /*
    616  * Hybrids
    617  */
    618 
    619 ssize_t
    620 read(int fd, void *buf, size_t len)
    621 {
    622 	ssize_t (*op_read)(int, void *, size_t);
    623 	ssize_t n;
    624 
    625 	DPRINTF(("read %d\n", fd));
    626 	if (fd_isrump(fd)) {
    627 		fd = fd_host2rump(fd);
    628 		op_read = rumpcalls[RUMPCALL_READ];
    629 	} else {
    630 		op_read = host_read;
    631 	}
    632 
    633 	n = op_read(fd, buf, len);
    634 	return n;
    635 }
    636 
    637 ssize_t
    638 readv(int fd, const struct iovec *iov, int iovcnt)
    639 {
    640 	ssize_t (*op_readv)(int, const struct iovec *, int);
    641 
    642 	DPRINTF(("readv %d\n", fd));
    643 	if (fd_isrump(fd)) {
    644 		fd = fd_host2rump(fd);
    645 		op_readv = rumpcalls[RUMPCALL_READV];
    646 	} else {
    647 		op_readv = host_readv;
    648 	}
    649 
    650 	return op_readv(fd, iov, iovcnt);
    651 }
    652 
    653 ssize_t
    654 write(int fd, const void *buf, size_t len)
    655 {
    656 	ssize_t (*op_write)(int, const void *, size_t);
    657 
    658 	if (fd_isrump(fd)) {
    659 		fd = fd_host2rump(fd);
    660 		op_write = rumpcalls[RUMPCALL_WRITE];
    661 	} else {
    662 		op_write = host_write;
    663 	}
    664 
    665 	return op_write(fd, buf, len);
    666 }
    667 
    668 ssize_t
    669 writev(int fd, const struct iovec *iov, int iovcnt)
    670 {
    671 	ssize_t (*op_writev)(int, const struct iovec *, int);
    672 
    673 	DPRINTF(("writev %d\n", fd));
    674 	if (fd_isrump(fd)) {
    675 		fd = fd_host2rump(fd);
    676 		op_writev = rumpcalls[RUMPCALL_WRITEV];
    677 	} else {
    678 		op_writev = host_writev;
    679 	}
    680 
    681 	return op_writev(fd, iov, iovcnt);
    682 }
    683 
    684 int
    685 ioctl(int fd, unsigned long cmd, ...)
    686 {
    687 	int (*op_ioctl)(int, unsigned long cmd, ...);
    688 	va_list ap;
    689 	int rv;
    690 
    691 	DPRINTF(("ioctl\n"));
    692 	if (fd_isrump(fd)) {
    693 		fd = fd_host2rump(fd);
    694 		op_ioctl = rumpcalls[RUMPCALL_IOCTL];
    695 	} else {
    696 		op_ioctl = host_ioctl;
    697 	}
    698 
    699 	va_start(ap, cmd);
    700 	rv = op_ioctl(fd, cmd, va_arg(ap, void *));
    701 	va_end(ap);
    702 	return rv;
    703 }
    704 
    705 int
    706 fcntl(int fd, int cmd, ...)
    707 {
    708 	int (*op_fcntl)(int, int, ...);
    709 	va_list ap;
    710 	int rv;
    711 
    712 	DPRINTF(("fcntl\n"));
    713 	if (fd_isrump(fd)) {
    714 		fd = fd_host2rump(fd);
    715 		op_fcntl = rumpcalls[RUMPCALL_FCNTL];
    716 	} else {
    717 		op_fcntl = host_fcntl;
    718 	}
    719 
    720 	va_start(ap, cmd);
    721 	rv = op_fcntl(fd, cmd, va_arg(ap, void *));
    722 	va_end(ap);
    723 	return rv;
    724 }
    725 
    726 int
    727 close(int fd)
    728 {
    729 	int (*op_close)(int);
    730 
    731 	DPRINTF(("close %d\n", fd));
    732 	if (fd_isrump(fd)) {
    733 		fd = fd_host2rump(fd);
    734 		op_close = rumpcalls[RUMPCALL_CLOSE];
    735 	} else {
    736 		op_close = host_close;
    737 	}
    738 
    739 	return op_close(fd);
    740 }
    741 
    742 int
    743 SELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
    744 	struct timeval *timeout)
    745 {
    746 	struct pollfd *pfds;
    747 	struct timespec ts, *tsp = NULL;
    748 	nfds_t i, j, realnfds;
    749 	int rv, incr;
    750 
    751 	DPRINTF(("select\n"));
    752 
    753 	/*
    754 	 * Well, first we must scan the fds to figure out how many
    755 	 * fds there really are.  This is because up to and including
    756 	 * nb5 poll() silently refuses nfds > process_open_fds.
    757 	 * Seems to be fixed in current, thank the maker.
    758 	 * god damn cluster...bomb.
    759 	 */
    760 
    761 	for (i = 0, realnfds = 0; i < nfds; i++) {
    762 		if (readfds && FD_ISSET(i, readfds)) {
    763 			realnfds++;
    764 			continue;
    765 		}
    766 		if (writefds && FD_ISSET(i, writefds)) {
    767 			realnfds++;
    768 			continue;
    769 		}
    770 		if (exceptfds && FD_ISSET(i, exceptfds)) {
    771 			realnfds++;
    772 			continue;
    773 		}
    774 	}
    775 
    776 	if (realnfds) {
    777 		pfds = malloc(sizeof(*pfds) * realnfds);
    778 		if (!pfds)
    779 			return -1;
    780 	} else {
    781 		pfds = NULL;
    782 	}
    783 
    784 	for (i = 0, j = 0; i < nfds; i++) {
    785 		incr = 0;
    786 		pfds[j].events = pfds[j].revents = 0;
    787 		if (readfds && FD_ISSET(i, readfds)) {
    788 			pfds[j].fd = i;
    789 			pfds[j].events |= POLLIN;
    790 			incr=1;
    791 		}
    792 		if (writefds && FD_ISSET(i, writefds)) {
    793 			pfds[j].fd = i;
    794 			pfds[j].events |= POLLOUT;
    795 			incr=1;
    796 		}
    797 		if (exceptfds && FD_ISSET(i, exceptfds)) {
    798 			pfds[j].fd = i;
    799 			pfds[j].events |= POLLHUP|POLLERR;
    800 			incr=1;
    801 		}
    802 		if (incr)
    803 			j++;
    804 	}
    805 
    806 	if (timeout) {
    807 		TIMEVAL_TO_TIMESPEC(timeout, &ts);
    808 		tsp = &ts;
    809 	}
    810 	rv = pollts(pfds, realnfds, tsp, NULL);
    811 	if (rv <= 0)
    812 		goto out;
    813 
    814 	/*
    815 	 * ok, harvest results.  first zero out entries (can't use
    816 	 * FD_ZERO for the obvious select-me-not reason).  whee.
    817 	 */
    818 	for (i = 0; i < nfds; i++) {
    819 		if (readfds)
    820 			FD_CLR(i, readfds);
    821 		if (writefds)
    822 			FD_CLR(i, writefds);
    823 		if (exceptfds)
    824 			FD_CLR(i, exceptfds);
    825 	}
    826 
    827 	/* and then plug in the results */
    828 	for (i = 0; i < realnfds; i++) {
    829 		if (readfds) {
    830 			if (pfds[i].revents & POLLIN) {
    831 				FD_SET(pfds[i].fd, readfds);
    832 			}
    833 		}
    834 		if (writefds) {
    835 			if (pfds[i].revents & POLLOUT) {
    836 				FD_SET(pfds[i].fd, writefds);
    837 			}
    838 		}
    839 		if (exceptfds) {
    840 			if (pfds[i].revents & (POLLHUP|POLLERR)) {
    841 				FD_SET(pfds[i].fd, exceptfds);
    842 			}
    843 		}
    844 	}
    845 
    846  out:
    847 	free(pfds);
    848 	return rv;
    849 }
    850 
    851 static void
    852 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
    853 {
    854 	nfds_t i;
    855 
    856 	for (i = 0; i < nfds; i++) {
    857 		if (fds[i].fd == -1)
    858 			continue;
    859 
    860 		if (fd_isrump(fds[i].fd))
    861 			(*rumpcall)++;
    862 		else
    863 			(*hostcall)++;
    864 	}
    865 }
    866 
    867 static void
    868 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
    869 {
    870 	nfds_t i;
    871 
    872 	for (i = 0; i < nfds; i++) {
    873 		fds[i].fd = fdadj(fds[i].fd);
    874 	}
    875 }
    876 
    877 /*
    878  * poll is easy as long as the call comes in the fds only in one
    879  * kernel.  otherwise its quite tricky...
    880  */
    881 struct pollarg {
    882 	struct pollfd *pfds;
    883 	nfds_t nfds;
    884 	const struct timespec *ts;
    885 	const sigset_t *sigmask;
    886 	int pipefd;
    887 	int errnum;
    888 };
    889 
    890 static void *
    891 hostpoll(void *arg)
    892 {
    893 	struct pollarg *parg = arg;
    894 	intptr_t rv;
    895 
    896 	rv = host_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
    897 	if (rv == -1)
    898 		parg->errnum = errno;
    899 	rump_sys_write(parg->pipefd, &rv, sizeof(rv));
    900 
    901 	return (void *)(intptr_t)rv;
    902 }
    903 
    904 int
    905 POLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
    906 	const sigset_t *sigmask)
    907 {
    908 	int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
    909 			 const sigset_t *);
    910 	int hostcall = 0, rumpcall = 0;
    911 	pthread_t pt;
    912 	nfds_t i;
    913 	int rv;
    914 
    915 	DPRINTF(("poll\n"));
    916 	checkpoll(fds, nfds, &hostcall, &rumpcall);
    917 
    918 	if (hostcall && rumpcall) {
    919 		struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
    920 		int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
    921 		struct pollarg parg;
    922 		uintptr_t lrv;
    923 		int sverrno = 0, trv;
    924 
    925 		/*
    926 		 * ok, this is where it gets tricky.  We must support
    927 		 * this since it's a very common operation in certain
    928 		 * types of software (telnet, netcat, etc).  We allocate
    929 		 * two vectors and run two poll commands in separate
    930 		 * threads.  Whichever returns first "wins" and the
    931 		 * other kernel's fds won't show activity.
    932 		 */
    933 		rv = -1;
    934 
    935 		/* allocate full vector for O(n) joining after call */
    936 		pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
    937 		if (!pfd_host)
    938 			goto out;
    939 		pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
    940 		if (!pfd_rump) {
    941 			goto out;
    942 		}
    943 
    944 		/* split vectors */
    945 		for (i = 0; i < nfds; i++) {
    946 			if (fds[i].fd == -1) {
    947 				pfd_host[i].fd = -1;
    948 				pfd_rump[i].fd = -1;
    949 			} else if (fd_isrump(fds[i].fd)) {
    950 				pfd_host[i].fd = -1;
    951 				pfd_rump[i].fd = fd_host2rump(fds[i].fd);
    952 				pfd_rump[i].events = fds[i].events;
    953 			} else {
    954 				pfd_rump[i].fd = -1;
    955 				pfd_host[i].fd = fds[i].fd;
    956 				pfd_host[i].events = fds[i].events;
    957 			}
    958 			fds[i].revents = 0;
    959 		}
    960 
    961 		/*
    962 		 * then, open two pipes, one for notifications
    963 		 * to each kernel.
    964 		 */
    965 		if (rump_sys_pipe(rpipe) == -1)
    966 			goto out;
    967 		if (pipe(hpipe) == -1)
    968 			goto out;
    969 
    970 		pfd_host[nfds].fd = hpipe[0];
    971 		pfd_host[nfds].events = POLLIN;
    972 		pfd_rump[nfds].fd = rpipe[0];
    973 		pfd_rump[nfds].events = POLLIN;
    974 
    975 		/*
    976 		 * then, create a thread to do host part and meanwhile
    977 		 * do rump kernel part right here
    978 		 */
    979 
    980 		parg.pfds = pfd_host;
    981 		parg.nfds = nfds+1;
    982 		parg.ts = ts;
    983 		parg.sigmask = sigmask;
    984 		parg.pipefd = rpipe[1];
    985 		pthread_create(&pt, NULL, hostpoll, &parg);
    986 
    987 		op_pollts = rumpcalls[RUMPCALL_POLLTS];
    988 		lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
    989 		sverrno = errno;
    990 		write(hpipe[1], &rv, sizeof(rv));
    991 		pthread_join(pt, (void *)&trv);
    992 
    993 		/* check who "won" and merge results */
    994 		if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
    995 			rv = trv;
    996 
    997 			for (i = 0; i < nfds; i++) {
    998 				if (pfd_rump[i].fd != -1)
    999 					fds[i].revents = pfd_rump[i].revents;
   1000 			}
   1001 			sverrno = parg.errnum;
   1002 		} else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
   1003 			rv = trv;
   1004 
   1005 			for (i = 0; i < nfds; i++) {
   1006 				if (pfd_host[i].fd != -1)
   1007 					fds[i].revents = pfd_host[i].revents;
   1008 			}
   1009 		} else {
   1010 			rv = 0;
   1011 		}
   1012 
   1013  out:
   1014 		if (rpipe[0] != -1)
   1015 			rump_sys_close(rpipe[0]);
   1016 		if (rpipe[1] != -1)
   1017 			rump_sys_close(rpipe[1]);
   1018 		if (hpipe[0] != -1)
   1019 			host_close(hpipe[0]);
   1020 		if (hpipe[1] != -1)
   1021 			host_close(hpipe[1]);
   1022 		free(pfd_host);
   1023 		free(pfd_rump);
   1024 		errno = sverrno;
   1025 	} else {
   1026 		if (hostcall) {
   1027 			op_pollts = host_pollts;
   1028 		} else {
   1029 			op_pollts = rumpcalls[RUMPCALL_POLLTS];
   1030 			adjustpoll(fds, nfds, fd_host2rump);
   1031 		}
   1032 
   1033 		rv = op_pollts(fds, nfds, ts, sigmask);
   1034 		if (rumpcall)
   1035 			adjustpoll(fds, nfds, fd_rump2host);
   1036 	}
   1037 
   1038 	return rv;
   1039 }
   1040 
   1041 int
   1042 POLL(struct pollfd *fds, nfds_t nfds, int timeout)
   1043 {
   1044 	struct timespec ts;
   1045 	struct timespec *tsp = NULL;
   1046 
   1047 	if (timeout != INFTIM) {
   1048 		ts.tv_sec = timeout / 1000;
   1049 		ts.tv_nsec = (timeout % 1000) * 1000*1000;
   1050 
   1051 		tsp = &ts;
   1052 	}
   1053 
   1054 	return pollts(fds, nfds, tsp, NULL);
   1055 }
   1056 
   1057 #if 0
   1058 int
   1059 kqueue(void)
   1060 {
   1061 
   1062 	abort();
   1063 }
   1064 
   1065 int
   1066 kevent(int kq, const struct kevent *changelist, size_t nchanges,
   1067 	struct kevent *eventlist, size_t nevents,
   1068 	const struct timespec *timeout)
   1069 {
   1070 
   1071 	abort();
   1072 }
   1073 #endif
   1074