Home | History | Annotate | Line # | Download | only in librumphijack
hijack.c revision 1.15
      1 /*      $NetBSD: hijack.c,v 1.15 2011/01/18 23:43:21 pooka Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __RCSID("$NetBSD: hijack.c,v 1.15 2011/01/18 23:43:21 pooka Exp $");
     30 
     31 #include <sys/param.h>
     32 #include <sys/types.h>
     33 #include <sys/event.h>
     34 #include <sys/ioctl.h>
     35 #include <sys/socket.h>
     36 #include <sys/poll.h>
     37 
     38 #include <rump/rumpclient.h>
     39 #include <rump/rump_syscalls.h>
     40 
     41 #include <assert.h>
     42 #include <dlfcn.h>
     43 #include <err.h>
     44 #include <errno.h>
     45 #include <fcntl.h>
     46 #include <poll.h>
     47 #include <pthread.h>
     48 #include <signal.h>
     49 #include <stdarg.h>
     50 #include <stdbool.h>
     51 #include <stdio.h>
     52 #include <stdlib.h>
     53 #include <time.h>
     54 #include <unistd.h>
     55 
     56 enum {	RUMPCALL_SOCKET, RUMPCALL_ACCEPT, RUMPCALL_BIND, RUMPCALL_CONNECT,
     57 	RUMPCALL_GETPEERNAME, RUMPCALL_GETSOCKNAME, RUMPCALL_LISTEN,
     58 	RUMPCALL_RECVFROM, RUMPCALL_RECVMSG,
     59 	RUMPCALL_SENDTO, RUMPCALL_SENDMSG,
     60 	RUMPCALL_GETSOCKOPT, RUMPCALL_SETSOCKOPT,
     61 	RUMPCALL_SHUTDOWN,
     62 	RUMPCALL_READ, RUMPCALL_READV,
     63 	RUMPCALL_WRITE, RUMPCALL_WRITEV,
     64 	RUMPCALL_IOCTL, RUMPCALL_FCNTL,
     65 	RUMPCALL_CLOSE,
     66 	RUMPCALL_POLLTS,
     67 	RUMPCALL__NUM
     68 };
     69 
     70 #define RSYS_STRING(a) __STRING(a)
     71 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
     72 
     73 const char *sysnames[] = {
     74 	RSYS_NAME(SOCKET),
     75 	RSYS_NAME(ACCEPT),
     76 	RSYS_NAME(BIND),
     77 	RSYS_NAME(CONNECT),
     78 	RSYS_NAME(GETPEERNAME),
     79 	RSYS_NAME(GETSOCKNAME),
     80 	RSYS_NAME(LISTEN),
     81 	RSYS_NAME(RECVFROM),
     82 	RSYS_NAME(RECVMSG),
     83 	RSYS_NAME(SENDTO),
     84 	RSYS_NAME(SENDMSG),
     85 	RSYS_NAME(GETSOCKOPT),
     86 	RSYS_NAME(SETSOCKOPT),
     87 	RSYS_NAME(SHUTDOWN),
     88 	RSYS_NAME(READ),
     89 	RSYS_NAME(READV),
     90 	RSYS_NAME(WRITE),
     91 	RSYS_NAME(WRITEV),
     92 	RSYS_NAME(IOCTL),
     93 	RSYS_NAME(FCNTL),
     94 	RSYS_NAME(CLOSE),
     95 	RSYS_NAME(POLLTS),
     96 };
     97 
     98 static int	(*host_socket)(int, int, int);
     99 static int	(*host_connect)(int, const struct sockaddr *, socklen_t);
    100 static int	(*host_bind)(int, const struct sockaddr *, socklen_t);
    101 static int	(*host_listen)(int, int);
    102 static int	(*host_accept)(int, struct sockaddr *, socklen_t *);
    103 static int	(*host_getpeername)(int, struct sockaddr *, socklen_t *);
    104 static int	(*host_getsockname)(int, struct sockaddr *, socklen_t *);
    105 static int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
    106 
    107 static ssize_t	(*host_read)(int, void *, size_t);
    108 static ssize_t	(*host_readv)(int, const struct iovec *, int);
    109 static ssize_t	(*host_write)(int, const void *, size_t);
    110 static ssize_t	(*host_writev)(int, const struct iovec *, int);
    111 static int	(*host_ioctl)(int, unsigned long, ...);
    112 static int	(*host_fcntl)(int, int, ...);
    113 static int	(*host_close)(int);
    114 static int	(*host_pollts)(struct pollfd *, nfds_t,
    115 			       const struct timespec *, const sigset_t *);
    116 static pid_t	(*host_fork)(void);
    117 static int	(*host_dup2)(int, int);
    118 static int	(*host_shutdown)(int, int);
    119 
    120 static void *rumpcalls[RUMPCALL__NUM];
    121 
    122 /*
    123  * Would be nice to get this automatically in sync with libc.
    124  * Also, this does not work for compat-using binaries!
    125  */
    126 
    127 #if !__NetBSD_Prereq__(5,99,7)
    128 #define SELECT select
    129 #define POLLTS pollts
    130 #define POLL poll
    131 #else
    132 #define SELECT __select50
    133 #define POLLTS __pollts50
    134 #define POLL __poll50
    135 
    136 int SELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
    137 int POLLTS(struct pollfd *, nfds_t, const struct timespec *, const sigset_t *);
    138 int POLL(struct pollfd *, nfds_t, int);
    139 #endif
    140 
    141 /*
    142  * This is called from librumpclient in case of LD_PRELOAD.
    143  * It ensures correct RTLD_NEXT.
    144  */
    145 static void *
    146 hijackdlsym(void *handle, const char *symbol)
    147 {
    148 
    149 	return dlsym(handle, symbol);
    150 }
    151 
    152 /* low calorie sockets? */
    153 static bool hostlocalsockets = true;
    154 
    155 static void __attribute__((constructor))
    156 rcinit(void)
    157 {
    158 	int (*rumpcinit)(void);
    159 	void **rumpcdlsym;
    160 	void *hand;
    161 	int i;
    162 
    163 	hand = dlopen("librumpclient.so", RTLD_LAZY|RTLD_GLOBAL);
    164 	if (!hand)
    165 		err(1, "cannot open librumpclient.so");
    166 	rumpcinit = dlsym(hand, "rumpclient_init");
    167 	_DIAGASSERT(rumpcinit);
    168 
    169 	rumpcdlsym = dlsym(hand, "rumpclient_dlsym");
    170 	*rumpcdlsym = hijackdlsym;
    171 
    172 	host_socket = dlsym(RTLD_NEXT, "__socket30");
    173 	host_listen = dlsym(RTLD_NEXT, "listen");
    174 	host_connect = dlsym(RTLD_NEXT, "connect");
    175 	host_bind = dlsym(RTLD_NEXT, "bind");
    176 	host_accept = dlsym(RTLD_NEXT, "accept");
    177 	host_getpeername = dlsym(RTLD_NEXT, "getpeername");
    178 	host_getsockname = dlsym(RTLD_NEXT, "getsockname");
    179 	host_setsockopt = dlsym(RTLD_NEXT, "setsockopt");
    180 
    181 	host_read = dlsym(RTLD_NEXT, "read");
    182 	host_readv = dlsym(RTLD_NEXT, "readv");
    183 	host_write = dlsym(RTLD_NEXT, "write");
    184 	host_writev = dlsym(RTLD_NEXT, "writev");
    185 	host_ioctl = dlsym(RTLD_NEXT, "ioctl");
    186 	host_fcntl = dlsym(RTLD_NEXT, "fcntl");
    187 	host_close = dlsym(RTLD_NEXT, "close");
    188 	host_pollts = dlsym(RTLD_NEXT, "pollts");
    189 	host_fork = dlsym(RTLD_NEXT, "fork");
    190 	host_dup2 = dlsym(RTLD_NEXT, "dup2");
    191 	host_shutdown = dlsym(RTLD_NEXT, "shutdown");
    192 
    193 	for (i = 0; i < RUMPCALL__NUM; i++) {
    194 		rumpcalls[i] = dlsym(hand, sysnames[i]);
    195 		if (!rumpcalls[i]) {
    196 			fprintf(stderr, "rumphijack: cannot find symbol: %s\n",
    197 			    sysnames[i]);
    198 			exit(1);
    199 		}
    200 	}
    201 
    202 	if (rumpcinit() == -1)
    203 		err(1, "rumpclient init");
    204 }
    205 
    206 static unsigned dup2mask;
    207 #define ISDUP2D(fd) (1<<(fd) & dup2mask)
    208 
    209 //#define DEBUGJACK
    210 #ifdef DEBUGJACK
    211 #define DPRINTF(x) mydprintf x
    212 static void
    213 mydprintf(const char *fmt, ...)
    214 {
    215 	va_list ap;
    216 
    217 	if (ISDUP2D(STDERR_FILENO))
    218 		return;
    219 
    220 	va_start(ap, fmt);
    221 	vfprintf(stderr, fmt, ap);
    222 	va_end(ap);
    223 }
    224 
    225 #else
    226 #define DPRINTF(x)
    227 #endif
    228 
    229 /* XXX: need runtime selection.  low for now due to FD_SETSIZE */
    230 #define HIJACK_FDOFF 128
    231 #define HIJACK_SELECT 128 /* XXX */
    232 #define HIJACK_ASSERT 128 /* XXX */
    233 static int
    234 fd_rump2host(int fd)
    235 {
    236 
    237 	if (fd == -1)
    238 		return fd;
    239 
    240 	if (!ISDUP2D(fd))
    241 		fd += HIJACK_FDOFF;
    242 
    243 	return fd;
    244 }
    245 
    246 static int
    247 fd_host2rump(int fd)
    248 {
    249 
    250 	if (!ISDUP2D(fd))
    251 		fd -= HIJACK_FDOFF;
    252 	return fd;
    253 }
    254 
    255 static bool
    256 fd_isrump(int fd)
    257 {
    258 
    259 	return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
    260 }
    261 
    262 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
    263 #undef HIJACK_FDOFF
    264 
    265 int __socket30(int, int, int);
    266 int
    267 __socket30(int domain, int type, int protocol)
    268 {
    269 	int (*rc_socket)(int, int, int);
    270 	int fd;
    271 	bool dohost;
    272 
    273 	dohost = hostlocalsockets && (domain == AF_LOCAL);
    274 
    275 	if (dohost)
    276 		rc_socket = host_socket;
    277 	else
    278 		rc_socket = rumpcalls[RUMPCALL_SOCKET];
    279 	fd = rc_socket(domain, type, protocol);
    280 
    281 	if (!dohost)
    282 		fd = fd_rump2host(fd);
    283 	DPRINTF(("socket <- %d\n", fd));
    284 
    285 	return fd;
    286 }
    287 
    288 int
    289 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
    290 {
    291 	int (*rc_accept)(int, struct sockaddr *, socklen_t *);
    292 	int fd;
    293 	bool isrump;
    294 
    295 	isrump = fd_isrump(s);
    296 
    297 	DPRINTF(("accept -> %d", s));
    298 	if (isrump) {
    299 		rc_accept = rumpcalls[RUMPCALL_ACCEPT];
    300 		s = fd_host2rump(s);
    301 	} else {
    302 		rc_accept = host_accept;
    303 	}
    304 	fd = rc_accept(s, addr, addrlen);
    305 	if (fd != -1 && isrump)
    306 		fd = fd_rump2host(fd);
    307 
    308 	DPRINTF((" <- %d\n", fd));
    309 
    310 	return fd;
    311 }
    312 
    313 int
    314 bind(int s, const struct sockaddr *name, socklen_t namelen)
    315 {
    316 	int (*rc_bind)(int, const struct sockaddr *, socklen_t);
    317 
    318 	DPRINTF(("bind -> %d\n", s));
    319 	if (fd_isrump(s)) {
    320 		rc_bind = rumpcalls[RUMPCALL_BIND];
    321 		s = fd_host2rump(s);
    322 	} else {
    323 		rc_bind = host_bind;
    324 	}
    325 	return rc_bind(s, name, namelen);
    326 }
    327 
    328 int
    329 connect(int s, const struct sockaddr *name, socklen_t namelen)
    330 {
    331 	int (*rc_connect)(int, const struct sockaddr *, socklen_t);
    332 
    333 	DPRINTF(("connect -> %d\n", s));
    334 	if (fd_isrump(s)) {
    335 		rc_connect = rumpcalls[RUMPCALL_CONNECT];
    336 		s = fd_host2rump(s);
    337 	} else {
    338 		rc_connect = host_connect;
    339 	}
    340 
    341 	return rc_connect(s, name, namelen);
    342 }
    343 
    344 int
    345 getpeername(int s, struct sockaddr *name, socklen_t *namelen)
    346 {
    347 	int (*rc_getpeername)(int, struct sockaddr *, socklen_t *);
    348 
    349 	DPRINTF(("getpeername -> %d\n", s));
    350 	if (fd_isrump(s)) {
    351 		rc_getpeername = rumpcalls[RUMPCALL_GETPEERNAME];
    352 		s = fd_host2rump(s);
    353 	} else {
    354 		rc_getpeername = host_getpeername;
    355 	}
    356 	return rc_getpeername(s, name, namelen);
    357 }
    358 
    359 int
    360 getsockname(int s, struct sockaddr *name, socklen_t *namelen)
    361 {
    362 	int (*rc_getsockname)(int, struct sockaddr *, socklen_t *);
    363 
    364 	DPRINTF(("getsockname -> %d\n", s));
    365 	if (fd_isrump(s)) {
    366 		rc_getsockname = rumpcalls[RUMPCALL_GETSOCKNAME];
    367 		s = fd_host2rump(s);
    368 	} else {
    369 		rc_getsockname = host_getsockname;
    370 	}
    371 	return rc_getsockname(s, name, namelen);
    372 }
    373 
    374 int
    375 listen(int s, int backlog)
    376 {
    377 	int (*rc_listen)(int, int);
    378 
    379 	DPRINTF(("listen -> %d\n", s));
    380 	if (fd_isrump(s)) {
    381 		rc_listen = rumpcalls[RUMPCALL_LISTEN];
    382 		s = fd_host2rump(s);
    383 	} else {
    384 		rc_listen = host_listen;
    385 	}
    386 	return rc_listen(s, backlog);
    387 }
    388 
    389 ssize_t
    390 recv(int s, void *buf, size_t len, int flags)
    391 {
    392 
    393 	return recvfrom(s, buf, len, flags, NULL, NULL);
    394 }
    395 
    396 ssize_t
    397 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
    398 	socklen_t *fromlen)
    399 {
    400 	int (*rc_recvfrom)(int, void *, size_t, int,
    401 	    struct sockaddr *, socklen_t *);
    402 
    403 	DPRINTF(("recvfrom\n"));
    404 	assertfd(s);
    405 	rc_recvfrom = rumpcalls[RUMPCALL_RECVFROM];
    406 	return rc_recvfrom(fd_host2rump(s), buf, len, flags, from, fromlen);
    407 }
    408 
    409 ssize_t
    410 recvmsg(int s, struct msghdr *msg, int flags)
    411 {
    412 	int (*rc_recvmsg)(int, struct msghdr *, int);
    413 
    414 	DPRINTF(("recvmsg\n"));
    415 	assertfd(s);
    416 	rc_recvmsg = rumpcalls[RUMPCALL_RECVMSG];
    417 	return rc_recvmsg(fd_host2rump(s), msg, flags);
    418 }
    419 
    420 ssize_t
    421 send(int s, const void *buf, size_t len, int flags)
    422 {
    423 
    424 	return sendto(s, buf, len, flags, NULL, 0);
    425 }
    426 
    427 ssize_t
    428 sendto(int s, const void *buf, size_t len, int flags,
    429 	const struct sockaddr *to, socklen_t tolen)
    430 {
    431 	int (*rc_sendto)(int, const void *, size_t, int,
    432 	    const struct sockaddr *, socklen_t);
    433 
    434 	if (s == -1)
    435 		return len;
    436 
    437 	DPRINTF(("sendto\n"));
    438 	assertfd(s);
    439 	rc_sendto = rumpcalls[RUMPCALL_SENDTO];
    440 	return rc_sendto(fd_host2rump(s), buf, len, flags, to, tolen);
    441 }
    442 
    443 ssize_t
    444 sendmsg(int s, const struct msghdr *msg, int flags)
    445 {
    446 	int (*rc_sendmsg)(int, const struct msghdr *, int);
    447 
    448 	DPRINTF(("sendmsg\n"));
    449 	assertfd(s);
    450 	rc_sendmsg = rumpcalls[RUMPCALL_SENDTO];
    451 	return rc_sendmsg(fd_host2rump(s), msg, flags);
    452 }
    453 
    454 int
    455 getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
    456 {
    457 	int (*rc_getsockopt)(int, int, int, void *, socklen_t *);
    458 
    459 	DPRINTF(("getsockopt -> %d\n", s));
    460 	assertfd(s);
    461 	rc_getsockopt = rumpcalls[RUMPCALL_GETSOCKOPT];
    462 	return rc_getsockopt(fd_host2rump(s), level, optname, optval, optlen);
    463 }
    464 
    465 int
    466 setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen)
    467 {
    468 	int (*rc_setsockopt)(int, int, int, const void *, socklen_t);
    469 
    470 	DPRINTF(("setsockopt -> %d\n", s));
    471 	if (fd_isrump(s)) {
    472 		rc_setsockopt = rumpcalls[RUMPCALL_SETSOCKOPT];
    473 		s = fd_host2rump(s);
    474 	} else {
    475 		rc_setsockopt = host_setsockopt;
    476 	}
    477 	return rc_setsockopt(s, level, optname, optval, optlen);
    478 }
    479 
    480 int
    481 shutdown(int s, int how)
    482 {
    483 	int (*rc_shutdown)(int, int);
    484 
    485 	DPRINTF(("shutdown -> %d\n", s));
    486 	if (fd_isrump(s)) {
    487 		rc_shutdown = rumpcalls[RUMPCALL_SHUTDOWN];
    488 		s = fd_host2rump(s);
    489 	} else {
    490 		rc_shutdown = host_shutdown;
    491 	}
    492 	return rc_shutdown(s, how);
    493 }
    494 
    495 /*
    496  * dup2 is special.  we allow dup2 of a rump kernel fd to 0-2 since
    497  * many programs do that.  dup2 of a rump kernel fd to another value
    498  * not >= fdoff is an error.
    499  *
    500  * Note: cannot rump2host newd, because it is often hardcoded.
    501  *
    502  * XXX: should disable debug prints after stdout/stderr are dup2'd
    503  */
    504 int
    505 dup2(int oldd, int newd)
    506 {
    507 	int rv;
    508 
    509 	DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
    510 
    511 	if (fd_isrump(oldd)) {
    512 		if (!(newd >= 0 && newd <= 2))
    513 			return EBADF;
    514 		oldd = fd_host2rump(oldd);
    515 		rv = rump_sys_dup2(oldd, newd);
    516 		if (rv != -1)
    517 			dup2mask |= 1<<newd;
    518 	} else {
    519 		rv = host_dup2(oldd, newd);
    520 	}
    521 
    522 	return rv;
    523 }
    524 
    525 /*
    526  * We just wrap fork the appropriate rump client calls to preserve
    527  * the file descriptors of the forked parent in the child, but
    528  * prevent double use of connection fd.
    529  */
    530 
    531 pid_t
    532 fork()
    533 {
    534 	struct rumpclient_fork *rf;
    535 	pid_t rv;
    536 
    537 	DPRINTF(("fork\n"));
    538 
    539 	if ((rf = rumpclient_prefork()) == NULL)
    540 		return -1;
    541 
    542 	switch ((rv = host_fork())) {
    543 	case -1:
    544 		/* XXX: cancel rf */
    545 		break;
    546 	case 0:
    547 		if (rumpclient_fork_init(rf) == -1)
    548 			rv = -1;
    549 		break;
    550 	default:
    551 		break;
    552 	}
    553 
    554 	DPRINTF(("fork returns %d\n", rv));
    555 	return rv;
    556 }
    557 
    558 /*
    559  * Hybrids
    560  */
    561 
    562 ssize_t
    563 read(int fd, void *buf, size_t len)
    564 {
    565 	ssize_t (*op_read)(int, void *, size_t);
    566 	ssize_t n;
    567 
    568 	DPRINTF(("read %d\n", fd));
    569 	if (fd_isrump(fd)) {
    570 		fd = fd_host2rump(fd);
    571 		op_read = rumpcalls[RUMPCALL_READ];
    572 	} else {
    573 		op_read = host_read;
    574 	}
    575 
    576 	n = op_read(fd, buf, len);
    577 	return n;
    578 }
    579 
    580 ssize_t
    581 readv(int fd, const struct iovec *iov, int iovcnt)
    582 {
    583 	ssize_t (*op_readv)(int, const struct iovec *, int);
    584 
    585 	DPRINTF(("readv %d\n", fd));
    586 	if (fd_isrump(fd)) {
    587 		fd = fd_host2rump(fd);
    588 		op_readv = rumpcalls[RUMPCALL_READV];
    589 	} else {
    590 		op_readv = host_readv;
    591 	}
    592 
    593 	return op_readv(fd, iov, iovcnt);
    594 }
    595 
    596 ssize_t
    597 write(int fd, const void *buf, size_t len)
    598 {
    599 	ssize_t (*op_write)(int, const void *, size_t);
    600 
    601 	if (fd_isrump(fd)) {
    602 		fd = fd_host2rump(fd);
    603 		op_write = rumpcalls[RUMPCALL_WRITE];
    604 	} else {
    605 		op_write = host_write;
    606 	}
    607 
    608 	return op_write(fd, buf, len);
    609 }
    610 
    611 ssize_t
    612 writev(int fd, const struct iovec *iov, int iovcnt)
    613 {
    614 	ssize_t (*op_writev)(int, const struct iovec *, int);
    615 
    616 	DPRINTF(("writev %d\n", fd));
    617 	if (fd_isrump(fd)) {
    618 		fd = fd_host2rump(fd);
    619 		op_writev = rumpcalls[RUMPCALL_WRITEV];
    620 	} else {
    621 		op_writev = host_writev;
    622 	}
    623 
    624 	return op_writev(fd, iov, iovcnt);
    625 }
    626 
    627 int
    628 ioctl(int fd, unsigned long cmd, ...)
    629 {
    630 	int (*op_ioctl)(int, unsigned long cmd, ...);
    631 	va_list ap;
    632 	int rv;
    633 
    634 	DPRINTF(("ioctl\n"));
    635 	if (fd_isrump(fd)) {
    636 		fd = fd_host2rump(fd);
    637 		op_ioctl = rumpcalls[RUMPCALL_IOCTL];
    638 	} else {
    639 		op_ioctl = host_ioctl;
    640 	}
    641 
    642 	va_start(ap, cmd);
    643 	rv = op_ioctl(fd, cmd, va_arg(ap, void *));
    644 	va_end(ap);
    645 	return rv;
    646 }
    647 
    648 int
    649 fcntl(int fd, int cmd, ...)
    650 {
    651 	int (*op_fcntl)(int, int, ...);
    652 	va_list ap;
    653 	int rv;
    654 
    655 	DPRINTF(("fcntl\n"));
    656 	if (fd_isrump(fd)) {
    657 		fd = fd_host2rump(fd);
    658 		op_fcntl = rumpcalls[RUMPCALL_FCNTL];
    659 	} else {
    660 		op_fcntl = host_fcntl;
    661 	}
    662 
    663 	va_start(ap, cmd);
    664 	rv = op_fcntl(fd, cmd, va_arg(ap, void *));
    665 	va_end(ap);
    666 	return rv;
    667 }
    668 
    669 int
    670 close(int fd)
    671 {
    672 	int (*op_close)(int);
    673 
    674 	DPRINTF(("close %d\n", fd));
    675 	if (fd_isrump(fd)) {
    676 		fd = fd_host2rump(fd);
    677 		op_close = rumpcalls[RUMPCALL_CLOSE];
    678 	} else {
    679 		op_close = host_close;
    680 	}
    681 
    682 	return op_close(fd);
    683 }
    684 
    685 int
    686 SELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
    687 	struct timeval *timeout)
    688 {
    689 	struct pollfd *pfds;
    690 	struct timespec ts, *tsp = NULL;
    691 	nfds_t i, j, realnfds;
    692 	int rv, incr;
    693 
    694 	DPRINTF(("select\n"));
    695 
    696 	/*
    697 	 * Well, first we must scan the fds to figure out how many
    698 	 * fds there really are.  This is because up to and including
    699 	 * nb5 poll() silently refuses nfds > process_open_fds.
    700 	 * Seems to be fixed in current, thank the maker.
    701 	 * god damn cluster...bomb.
    702 	 */
    703 
    704 	for (i = 0, realnfds = 0; i < nfds; i++) {
    705 		if (readfds && FD_ISSET(i, readfds)) {
    706 			realnfds++;
    707 			continue;
    708 		}
    709 		if (writefds && FD_ISSET(i, writefds)) {
    710 			realnfds++;
    711 			continue;
    712 		}
    713 		if (exceptfds && FD_ISSET(i, exceptfds)) {
    714 			realnfds++;
    715 			continue;
    716 		}
    717 	}
    718 
    719 	if (realnfds) {
    720 		pfds = malloc(sizeof(*pfds) * realnfds);
    721 		if (!pfds)
    722 			return -1;
    723 	} else {
    724 		pfds = NULL;
    725 	}
    726 
    727 	for (i = 0, j = 0; i < nfds; i++) {
    728 		incr = 0;
    729 		pfds[j].events = pfds[j].revents = 0;
    730 		if (readfds && FD_ISSET(i, readfds)) {
    731 			pfds[j].fd = i;
    732 			pfds[j].events |= POLLIN;
    733 			incr=1;
    734 		}
    735 		if (writefds && FD_ISSET(i, writefds)) {
    736 			pfds[j].fd = i;
    737 			pfds[j].events |= POLLOUT;
    738 			incr=1;
    739 		}
    740 		if (exceptfds && FD_ISSET(i, exceptfds)) {
    741 			pfds[j].fd = i;
    742 			pfds[j].events |= POLLHUP|POLLERR;
    743 			incr=1;
    744 		}
    745 		if (incr)
    746 			j++;
    747 	}
    748 
    749 	if (timeout) {
    750 		TIMEVAL_TO_TIMESPEC(timeout, &ts);
    751 		tsp = &ts;
    752 	}
    753 	rv = pollts(pfds, realnfds, tsp, NULL);
    754 	if (rv <= 0)
    755 		goto out;
    756 
    757 	/*
    758 	 * ok, harvest results.  first zero out entries (can't use
    759 	 * FD_ZERO for the obvious select-me-not reason).  whee.
    760 	 */
    761 	for (i = 0; i < nfds; i++) {
    762 		if (readfds)
    763 			FD_CLR(i, readfds);
    764 		if (writefds)
    765 			FD_CLR(i, writefds);
    766 		if (exceptfds)
    767 			FD_CLR(i, exceptfds);
    768 	}
    769 
    770 	/* and then plug in the results */
    771 	for (i = 0; i < realnfds; i++) {
    772 		if (readfds) {
    773 			if (pfds[i].revents & POLLIN) {
    774 				FD_SET(pfds[i].fd, readfds);
    775 			}
    776 		}
    777 		if (writefds) {
    778 			if (pfds[i].revents & POLLOUT) {
    779 				FD_SET(pfds[i].fd, writefds);
    780 			}
    781 		}
    782 		if (exceptfds) {
    783 			if (pfds[i].revents & (POLLHUP|POLLERR)) {
    784 				FD_SET(pfds[i].fd, exceptfds);
    785 			}
    786 		}
    787 	}
    788 
    789  out:
    790 	free(pfds);
    791 	return rv;
    792 }
    793 
    794 static void
    795 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
    796 {
    797 	nfds_t i;
    798 
    799 	for (i = 0; i < nfds; i++) {
    800 		if (fds[i].fd == -1)
    801 			continue;
    802 
    803 		if (fd_isrump(fds[i].fd))
    804 			(*rumpcall)++;
    805 		else
    806 			(*hostcall)++;
    807 	}
    808 }
    809 
    810 static void
    811 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
    812 {
    813 	nfds_t i;
    814 
    815 	for (i = 0; i < nfds; i++) {
    816 		fds[i].fd = fdadj(fds[i].fd);
    817 	}
    818 }
    819 
    820 /*
    821  * poll is easy as long as the call comes in the fds only in one
    822  * kernel.  otherwise its quite tricky...
    823  */
    824 struct pollarg {
    825 	struct pollfd *pfds;
    826 	nfds_t nfds;
    827 	const struct timespec *ts;
    828 	const sigset_t *sigmask;
    829 	int pipefd;
    830 	int errnum;
    831 };
    832 
    833 static void *
    834 hostpoll(void *arg)
    835 {
    836 	struct pollarg *parg = arg;
    837 	intptr_t rv;
    838 
    839 	rv = host_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
    840 	if (rv == -1)
    841 		parg->errnum = errno;
    842 	rump_sys_write(parg->pipefd, &rv, sizeof(rv));
    843 
    844 	return (void *)(intptr_t)rv;
    845 }
    846 
    847 int
    848 POLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
    849 	const sigset_t *sigmask)
    850 {
    851 	int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
    852 			 const sigset_t *);
    853 	int hostcall = 0, rumpcall = 0;
    854 	pthread_t pt;
    855 	nfds_t i;
    856 	int rv;
    857 
    858 	DPRINTF(("poll\n"));
    859 	checkpoll(fds, nfds, &hostcall, &rumpcall);
    860 
    861 	if (hostcall && rumpcall) {
    862 		struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
    863 		int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
    864 		struct pollarg parg;
    865 		uintptr_t lrv;
    866 		int sverrno = 0, trv;
    867 
    868 		/*
    869 		 * ok, this is where it gets tricky.  We must support
    870 		 * this since it's a very common operation in certain
    871 		 * types of software (telnet, netcat, etc).  We allocate
    872 		 * two vectors and run two poll commands in separate
    873 		 * threads.  Whichever returns first "wins" and the
    874 		 * other kernel's fds won't show activity.
    875 		 */
    876 		rv = -1;
    877 
    878 		/* allocate full vector for O(n) joining after call */
    879 		pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
    880 		if (!pfd_host)
    881 			goto out;
    882 		pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
    883 		if (!pfd_rump) {
    884 			goto out;
    885 		}
    886 
    887 		/* split vectors */
    888 		for (i = 0; i < nfds; i++) {
    889 			if (fds[i].fd == -1) {
    890 				pfd_host[i].fd = -1;
    891 				pfd_rump[i].fd = -1;
    892 			} else if (fd_isrump(fds[i].fd)) {
    893 				pfd_host[i].fd = -1;
    894 				pfd_rump[i].fd = fd_host2rump(fds[i].fd);
    895 				pfd_rump[i].events = fds[i].events;
    896 			} else {
    897 				pfd_rump[i].fd = -1;
    898 				pfd_host[i].fd = fds[i].fd;
    899 				pfd_host[i].events = fds[i].events;
    900 			}
    901 			fds[i].revents = 0;
    902 		}
    903 
    904 		/*
    905 		 * then, open two pipes, one for notifications
    906 		 * to each kernel.
    907 		 */
    908 		if (rump_sys_pipe(rpipe) == -1)
    909 			goto out;
    910 		if (pipe(hpipe) == -1)
    911 			goto out;
    912 
    913 		pfd_host[nfds].fd = hpipe[0];
    914 		pfd_host[nfds].events = POLLIN;
    915 		pfd_rump[nfds].fd = rpipe[0];
    916 		pfd_rump[nfds].events = POLLIN;
    917 
    918 		/*
    919 		 * then, create a thread to do host part and meanwhile
    920 		 * do rump kernel part right here
    921 		 */
    922 
    923 		parg.pfds = pfd_host;
    924 		parg.nfds = nfds+1;
    925 		parg.ts = ts;
    926 		parg.sigmask = sigmask;
    927 		parg.pipefd = rpipe[1];
    928 		pthread_create(&pt, NULL, hostpoll, &parg);
    929 
    930 		op_pollts = rumpcalls[RUMPCALL_POLLTS];
    931 		lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
    932 		sverrno = errno;
    933 		write(hpipe[1], &rv, sizeof(rv));
    934 		pthread_join(pt, (void *)&trv);
    935 
    936 		/* check who "won" and merge results */
    937 		if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
    938 			rv = trv;
    939 
    940 			for (i = 0; i < nfds; i++) {
    941 				if (pfd_rump[i].fd != -1)
    942 					fds[i].revents = pfd_rump[i].revents;
    943 			}
    944 			sverrno = parg.errnum;
    945 		} else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
    946 			rv = trv;
    947 
    948 			for (i = 0; i < nfds; i++) {
    949 				if (pfd_host[i].fd != -1)
    950 					fds[i].revents = pfd_host[i].revents;
    951 			}
    952 		} else {
    953 			rv = 0;
    954 		}
    955 
    956  out:
    957 		if (rpipe[0] != -1)
    958 			rump_sys_close(rpipe[0]);
    959 		if (rpipe[1] != -1)
    960 			rump_sys_close(rpipe[1]);
    961 		if (hpipe[0] != -1)
    962 			host_close(hpipe[0]);
    963 		if (hpipe[1] != -1)
    964 			host_close(hpipe[1]);
    965 		free(pfd_host);
    966 		free(pfd_rump);
    967 		errno = sverrno;
    968 	} else {
    969 		if (hostcall) {
    970 			op_pollts = host_pollts;
    971 		} else {
    972 			op_pollts = rumpcalls[RUMPCALL_POLLTS];
    973 			adjustpoll(fds, nfds, fd_host2rump);
    974 		}
    975 
    976 		rv = op_pollts(fds, nfds, ts, sigmask);
    977 		if (rumpcall)
    978 			adjustpoll(fds, nfds, fd_rump2host);
    979 	}
    980 
    981 	return rv;
    982 }
    983 
    984 int
    985 POLL(struct pollfd *fds, nfds_t nfds, int timeout)
    986 {
    987 	struct timespec ts;
    988 	struct timespec *tsp = NULL;
    989 
    990 	if (timeout != INFTIM) {
    991 		ts.tv_sec = timeout / 1000;
    992 		ts.tv_nsec = (timeout % 1000) * 1000*1000;
    993 
    994 		tsp = &ts;
    995 	}
    996 
    997 	return pollts(fds, nfds, tsp, NULL);
    998 }
    999 
   1000 int
   1001 kqueue(void)
   1002 {
   1003 
   1004 	abort();
   1005 }
   1006 
   1007 int
   1008 kevent(int kq, const struct kevent *changelist, size_t nchanges,
   1009 	struct kevent *eventlist, size_t nevents,
   1010 	const struct timespec *timeout)
   1011 {
   1012 
   1013 	abort();
   1014 }
   1015