hijack.c revision 1.33 1 /* $NetBSD: hijack.c,v 1.33 2011/02/08 12:20:11 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __RCSID("$NetBSD: hijack.c,v 1.33 2011/02/08 12:20:11 pooka Exp $");
30
31 #define __ssp_weak_name(fun) _hijack_ ## fun
32
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/event.h>
36 #include <sys/ioctl.h>
37 #include <sys/socket.h>
38 #include <sys/poll.h>
39
40 #include <rump/rumpclient.h>
41 #include <rump/rump_syscalls.h>
42
43 #include <assert.h>
44 #include <dlfcn.h>
45 #include <err.h>
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <poll.h>
49 #include <pthread.h>
50 #include <signal.h>
51 #include <stdarg.h>
52 #include <stdbool.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <time.h>
57 #include <unistd.h>
58
59 enum dualcall {
60 DUALCALL_WRITE, DUALCALL_WRITEV,
61 DUALCALL_IOCTL, DUALCALL_FCNTL,
62 DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT,
63 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN,
64 DUALCALL_RECVFROM, DUALCALL_RECVMSG,
65 DUALCALL_SENDTO, DUALCALL_SENDMSG,
66 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT,
67 DUALCALL_SHUTDOWN,
68 DUALCALL_READ, DUALCALL_READV,
69 DUALCALL_DUP2, DUALCALL_CLOSE,
70 DUALCALL_POLLTS,
71 DUALCALL__NUM
72 };
73
74 #define RSYS_STRING(a) __STRING(a)
75 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
76
77 /*
78 * Would be nice to get this automatically in sync with libc.
79 * Also, this does not work for compat-using binaries!
80 */
81 #if !__NetBSD_Prereq__(5,99,7)
82 #define REALSELECT select
83 #define REALPOLLTS pollts
84 #else
85 #define REALSELECT _sys___select50
86 #define REALPOLLTS _sys___pollts50
87 #endif
88 #define REALREAD _sys_read
89
90 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
91 int REALPOLLTS(struct pollfd *, nfds_t,
92 const struct timespec *, const sigset_t *);
93 ssize_t REALREAD(int, void *, size_t);
94
95 #define S(a) __STRING(a)
96 struct sysnames {
97 enum dualcall scm_callnum;
98 const char *scm_hostname;
99 const char *scm_rumpname;
100 } syscnames[] = {
101 { DUALCALL_SOCKET, "__socket30", RSYS_NAME(SOCKET) },
102 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) },
103 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) },
104 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) },
105 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) },
106 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) },
107 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) },
108 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) },
109 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) },
110 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) },
111 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) },
112 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) },
113 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) },
114 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) },
115 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) },
116 { DUALCALL_READV, "readv", RSYS_NAME(READV) },
117 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) },
118 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) },
119 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) },
120 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) },
121 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) },
122 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) },
123 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) },
124 };
125 #undef S
126
127 struct bothsys {
128 void *bs_host;
129 void *bs_rump;
130 } syscalls[DUALCALL__NUM];
131 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which
132
133 pid_t (*host_fork)(void);
134 int (*host_daemon)(int, int);
135
136 static unsigned dup2mask;
137 #define ISDUP2D(fd) (1<<(fd) & dup2mask)
138
139 //#define DEBUGJACK
140 #ifdef DEBUGJACK
141 #define DPRINTF(x) mydprintf x
142 static void
143 mydprintf(const char *fmt, ...)
144 {
145 va_list ap;
146
147 if (ISDUP2D(STDERR_FILENO))
148 return;
149
150 va_start(ap, fmt);
151 vfprintf(stderr, fmt, ap);
152 va_end(ap);
153 }
154
155 #else
156 #define DPRINTF(x)
157 #endif
158
159 #define FDCALL(type, name, rcname, args, proto, vars) \
160 type name args \
161 { \
162 type (*fun) proto; \
163 \
164 DPRINTF(("%s -> %d\n", __STRING(name), fd)); \
165 if (fd_isrump(fd)) { \
166 fun = syscalls[rcname].bs_rump; \
167 fd = fd_host2rump(fd); \
168 } else { \
169 fun = syscalls[rcname].bs_host; \
170 } \
171 \
172 return fun vars; \
173 }
174
175 /*
176 * This is called from librumpclient in case of LD_PRELOAD.
177 * It ensures correct RTLD_NEXT.
178 *
179 * ... except, it's apparently extremely difficult to force
180 * at least gcc to generate an actual stack frame here. So
181 * sprinkle some volatile foobar and baz to throw the optimizer
182 * off the scent and generate a variable assignment with the
183 * return value. The posterboy for this meltdown is amd64
184 * with -O2. At least with gcc 4.1.3 i386 works regardless of
185 * optimization.
186 */
187 volatile int rumphijack_unrope; /* there, unhang yourself */
188 static void *
189 hijackdlsym(void *handle, const char *symbol)
190 {
191 void *rv;
192
193 rv = dlsym(handle, symbol);
194 rumphijack_unrope = *(volatile int *)rv;
195
196 return (void *)rv;
197 }
198
199 /* low calorie sockets? */
200 static bool hostlocalsockets = true;
201
202 static void __attribute__((constructor))
203 rcinit(void)
204 {
205 char buf[64];
206 extern void *(*rumpclient_dlsym)(void *, const char *);
207 unsigned i, j;
208
209 rumpclient_dlsym = hijackdlsym;
210 host_fork = dlsym(RTLD_NEXT, "fork");
211 host_daemon = dlsym(RTLD_NEXT, "daemon");
212
213 /*
214 * In theory cannot print anything during lookups because
215 * we might not have the call vector set up. so, the errx()
216 * is a bit of a strech, but it might work.
217 */
218
219 for (i = 0; i < DUALCALL__NUM; i++) {
220 /* build runtime O(1) access */
221 for (j = 0; j < __arraycount(syscnames); j++) {
222 if (syscnames[j].scm_callnum == i)
223 break;
224 }
225
226 if (j == __arraycount(syscnames))
227 errx(1, "rumphijack error: syscall pos %d missing", i);
228
229 syscalls[i].bs_host = dlsym(RTLD_NEXT,
230 syscnames[j].scm_hostname);
231 if (syscalls[i].bs_host == NULL)
232 errx(1, "hostcall %s not found missing",
233 syscnames[j].scm_hostname);
234
235 syscalls[i].bs_rump = dlsym(RTLD_NEXT,
236 syscnames[j].scm_rumpname);
237 if (syscalls[i].bs_rump == NULL)
238 errx(1, "rumpcall %s not found missing",
239 syscnames[j].scm_rumpname);
240 }
241
242 if (rumpclient_init() == -1)
243 err(1, "rumpclient init");
244
245 /* set client persistence level */
246 if (getenv_r("RUMPHIJACK_RETRY", buf, sizeof(buf)) == -1) {
247 if (errno == ERANGE)
248 err(1, "invalid RUMPHIJACK_RETRY");
249 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME);
250 } else {
251 if (strcmp(buf, "die") == 0)
252 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE);
253 else if (strcmp(buf, "inftime") == 0)
254 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME);
255 else if (strcmp(buf, "once") == 0)
256 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE);
257 else {
258 time_t timeout;
259
260 timeout = (time_t)strtoll(buf, NULL, 10);
261 if (timeout <= 0)
262 errx(1, "RUMPHIJACK_RETRY must be keyword "
263 "or a positive integer, got: %s", buf);
264
265 rumpclient_setconnretry(timeout);
266 }
267 }
268 }
269
270 /* XXX: need runtime selection. low for now due to FD_SETSIZE */
271 #define HIJACK_FDOFF 128
272 #define HIJACK_SELECT 128 /* XXX */
273 #define HIJACK_ASSERT 128 /* XXX */
274 static int
275 fd_rump2host(int fd)
276 {
277
278 if (fd == -1)
279 return fd;
280
281 if (!ISDUP2D(fd))
282 fd += HIJACK_FDOFF;
283
284 return fd;
285 }
286
287 static int
288 fd_host2rump(int fd)
289 {
290
291 if (!ISDUP2D(fd))
292 fd -= HIJACK_FDOFF;
293 return fd;
294 }
295
296 static bool
297 fd_isrump(int fd)
298 {
299
300 return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
301 }
302
303 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
304 #undef HIJACK_FDOFF
305
306 int __socket30(int, int, int);
307 int
308 __socket30(int domain, int type, int protocol)
309 {
310 int (*op_socket)(int, int, int);
311 int fd;
312 bool dohost;
313
314 dohost = hostlocalsockets && (domain == AF_LOCAL);
315
316 if (dohost)
317 op_socket = GETSYSCALL(host, SOCKET);
318 else
319 op_socket = GETSYSCALL(rump, SOCKET);
320 fd = op_socket(domain, type, protocol);
321
322 if (!dohost)
323 fd = fd_rump2host(fd);
324 DPRINTF(("socket <- %d\n", fd));
325
326 return fd;
327 }
328
329 int
330 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
331 {
332 int (*op_accept)(int, struct sockaddr *, socklen_t *);
333 int fd;
334 bool isrump;
335
336 isrump = fd_isrump(s);
337
338 DPRINTF(("accept -> %d", s));
339 if (isrump) {
340 op_accept = GETSYSCALL(rump, ACCEPT);
341 s = fd_host2rump(s);
342 } else {
343 op_accept = GETSYSCALL(host, ACCEPT);
344 }
345 fd = op_accept(s, addr, addrlen);
346 if (fd != -1 && isrump)
347 fd = fd_rump2host(fd);
348
349 DPRINTF((" <- %d\n", fd));
350
351 return fd;
352 }
353
354 /*
355 * ioctl and fcntl are varargs calls and need special treatment
356 */
357 int
358 ioctl(int fd, unsigned long cmd, ...)
359 {
360 int (*op_ioctl)(int, unsigned long cmd, ...);
361 va_list ap;
362 int rv;
363
364 DPRINTF(("ioctl -> %d\n", fd));
365 if (fd_isrump(fd)) {
366 fd = fd_host2rump(fd);
367 op_ioctl = GETSYSCALL(rump, IOCTL);
368 } else {
369 op_ioctl = GETSYSCALL(host, IOCTL);
370 }
371
372 va_start(ap, cmd);
373 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
374 va_end(ap);
375 return rv;
376 }
377
378 int
379 fcntl(int fd, int cmd, ...)
380 {
381 int (*op_fcntl)(int, int, ...);
382 va_list ap;
383 int rv;
384
385 DPRINTF(("fcntl -> %d\n", fd));
386 if (fd_isrump(fd)) {
387 fd = fd_host2rump(fd);
388 op_fcntl = GETSYSCALL(rump, FCNTL);
389 } else {
390 op_fcntl = GETSYSCALL(host, FCNTL);
391 }
392
393 va_start(ap, cmd);
394 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
395 va_end(ap);
396 return rv;
397 }
398
399 /*
400 * write cannot issue a standard debug printf due to recursion
401 */
402 ssize_t
403 write(int fd, const void *buf, size_t blen)
404 {
405 ssize_t (*op_write)(int, const void *, size_t);
406
407 if (fd_isrump(fd)) {
408 fd = fd_host2rump(fd);
409 op_write = GETSYSCALL(rump, WRITE);
410 } else {
411 op_write = GETSYSCALL(host, WRITE);
412 }
413
414 return op_write(fd, buf, blen);
415 }
416
417 /*
418 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
419 * many programs do that. dup2 of a rump kernel fd to another value
420 * not >= fdoff is an error.
421 *
422 * Note: cannot rump2host newd, because it is often hardcoded.
423 */
424 int
425 dup2(int oldd, int newd)
426 {
427 int (*host_dup2)(int, int);
428 int rv;
429
430 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
431
432 if (fd_isrump(oldd)) {
433 if (!(newd >= 0 && newd <= 2))
434 return EBADF;
435 oldd = fd_host2rump(oldd);
436 rv = rump_sys_dup2(oldd, newd);
437 if (rv != -1)
438 dup2mask |= 1<<newd;
439 } else {
440 host_dup2 = syscalls[DUALCALL_DUP2].bs_host;
441 rv = host_dup2(oldd, newd);
442 }
443
444 return rv;
445 }
446
447 /*
448 * We just wrap fork the appropriate rump client calls to preserve
449 * the file descriptors of the forked parent in the child, but
450 * prevent double use of connection fd.
451 */
452 pid_t
453 fork()
454 {
455 struct rumpclient_fork *rf;
456 pid_t rv;
457
458 DPRINTF(("fork\n"));
459
460 if ((rf = rumpclient_prefork()) == NULL)
461 return -1;
462
463 switch ((rv = host_fork())) {
464 case -1:
465 /* XXX: cancel rf */
466 break;
467 case 0:
468 if (rumpclient_fork_init(rf) == -1)
469 rv = -1;
470 break;
471 default:
472 break;
473 }
474
475 DPRINTF(("fork returns %d\n", rv));
476 return rv;
477 }
478
479 int
480 daemon(int nochdir, int noclose)
481 {
482 struct rumpclient_fork *rf;
483
484 if ((rf = rumpclient_prefork()) == NULL)
485 return -1;
486
487 if (host_daemon(nochdir, noclose) == -1)
488 return -1;
489
490 if (rumpclient_fork_init(rf) == -1)
491 return -1;
492
493 return 0;
494 }
495
496 /*
497 * select is done by calling poll.
498 */
499 int
500 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
501 struct timeval *timeout)
502 {
503 struct pollfd *pfds;
504 struct timespec ts, *tsp = NULL;
505 nfds_t realnfds;
506 int i, j;
507 int rv, incr;
508
509 DPRINTF(("select\n"));
510
511 /*
512 * Well, first we must scan the fds to figure out how many
513 * fds there really are. This is because up to and including
514 * nb5 poll() silently refuses nfds > process_maxopen_fds.
515 * Seems to be fixed in current, thank the maker.
516 * god damn cluster...bomb.
517 */
518
519 for (i = 0, realnfds = 0; i < nfds; i++) {
520 if (readfds && FD_ISSET(i, readfds)) {
521 realnfds++;
522 continue;
523 }
524 if (writefds && FD_ISSET(i, writefds)) {
525 realnfds++;
526 continue;
527 }
528 if (exceptfds && FD_ISSET(i, exceptfds)) {
529 realnfds++;
530 continue;
531 }
532 }
533
534 if (realnfds) {
535 pfds = malloc(sizeof(*pfds) * realnfds);
536 if (!pfds)
537 return -1;
538 } else {
539 pfds = NULL;
540 }
541
542 for (i = 0, j = 0; i < nfds; i++) {
543 incr = 0;
544 pfds[j].events = pfds[j].revents = 0;
545 if (readfds && FD_ISSET(i, readfds)) {
546 pfds[j].fd = i;
547 pfds[j].events |= POLLIN;
548 incr=1;
549 }
550 if (writefds && FD_ISSET(i, writefds)) {
551 pfds[j].fd = i;
552 pfds[j].events |= POLLOUT;
553 incr=1;
554 }
555 if (exceptfds && FD_ISSET(i, exceptfds)) {
556 pfds[j].fd = i;
557 pfds[j].events |= POLLHUP|POLLERR;
558 incr=1;
559 }
560 if (incr)
561 j++;
562 }
563
564 if (timeout) {
565 TIMEVAL_TO_TIMESPEC(timeout, &ts);
566 tsp = &ts;
567 }
568 rv = REALPOLLTS(pfds, realnfds, tsp, NULL);
569 if (rv <= 0)
570 goto out;
571
572 /*
573 * ok, harvest results. first zero out entries (can't use
574 * FD_ZERO for the obvious select-me-not reason). whee.
575 */
576 for (i = 0; i < nfds; i++) {
577 if (readfds)
578 FD_CLR(i, readfds);
579 if (writefds)
580 FD_CLR(i, writefds);
581 if (exceptfds)
582 FD_CLR(i, exceptfds);
583 }
584
585 /* and then plug in the results */
586 for (i = 0; i < (int)realnfds; i++) {
587 if (readfds) {
588 if (pfds[i].revents & POLLIN) {
589 FD_SET(pfds[i].fd, readfds);
590 }
591 }
592 if (writefds) {
593 if (pfds[i].revents & POLLOUT) {
594 FD_SET(pfds[i].fd, writefds);
595 }
596 }
597 if (exceptfds) {
598 if (pfds[i].revents & (POLLHUP|POLLERR)) {
599 FD_SET(pfds[i].fd, exceptfds);
600 }
601 }
602 }
603
604 out:
605 free(pfds);
606 return rv;
607 }
608
609 static void
610 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
611 {
612 nfds_t i;
613
614 for (i = 0; i < nfds; i++) {
615 if (fds[i].fd == -1)
616 continue;
617
618 if (fd_isrump(fds[i].fd))
619 (*rumpcall)++;
620 else
621 (*hostcall)++;
622 }
623 }
624
625 static void
626 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
627 {
628 nfds_t i;
629
630 for (i = 0; i < nfds; i++) {
631 fds[i].fd = fdadj(fds[i].fd);
632 }
633 }
634
635 /*
636 * poll is easy as long as the call comes in the fds only in one
637 * kernel. otherwise its quite tricky...
638 */
639 struct pollarg {
640 struct pollfd *pfds;
641 nfds_t nfds;
642 const struct timespec *ts;
643 const sigset_t *sigmask;
644 int pipefd;
645 int errnum;
646 };
647
648 static void *
649 hostpoll(void *arg)
650 {
651 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
652 const sigset_t *);
653 struct pollarg *parg = arg;
654 intptr_t rv;
655
656 op_pollts = syscalls[DUALCALL_POLLTS].bs_host;
657 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
658 if (rv == -1)
659 parg->errnum = errno;
660 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
661
662 return (void *)(intptr_t)rv;
663 }
664
665 int
666 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
667 const sigset_t *sigmask)
668 {
669 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
670 const sigset_t *);
671 int (*host_close)(int);
672 int hostcall = 0, rumpcall = 0;
673 pthread_t pt;
674 nfds_t i;
675 int rv;
676
677 DPRINTF(("poll\n"));
678 checkpoll(fds, nfds, &hostcall, &rumpcall);
679
680 if (hostcall && rumpcall) {
681 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
682 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
683 struct pollarg parg;
684 uintptr_t lrv;
685 int sverrno = 0, trv;
686
687 /*
688 * ok, this is where it gets tricky. We must support
689 * this since it's a very common operation in certain
690 * types of software (telnet, netcat, etc). We allocate
691 * two vectors and run two poll commands in separate
692 * threads. Whichever returns first "wins" and the
693 * other kernel's fds won't show activity.
694 */
695 rv = -1;
696
697 /* allocate full vector for O(n) joining after call */
698 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
699 if (!pfd_host)
700 goto out;
701 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
702 if (!pfd_rump) {
703 goto out;
704 }
705
706 /* split vectors */
707 for (i = 0; i < nfds; i++) {
708 if (fds[i].fd == -1) {
709 pfd_host[i].fd = -1;
710 pfd_rump[i].fd = -1;
711 } else if (fd_isrump(fds[i].fd)) {
712 pfd_host[i].fd = -1;
713 pfd_rump[i].fd = fd_host2rump(fds[i].fd);
714 pfd_rump[i].events = fds[i].events;
715 } else {
716 pfd_rump[i].fd = -1;
717 pfd_host[i].fd = fds[i].fd;
718 pfd_host[i].events = fds[i].events;
719 }
720 fds[i].revents = 0;
721 }
722
723 /*
724 * then, open two pipes, one for notifications
725 * to each kernel.
726 */
727 if (rump_sys_pipe(rpipe) == -1)
728 goto out;
729 if (pipe(hpipe) == -1)
730 goto out;
731
732 pfd_host[nfds].fd = hpipe[0];
733 pfd_host[nfds].events = POLLIN;
734 pfd_rump[nfds].fd = rpipe[0];
735 pfd_rump[nfds].events = POLLIN;
736
737 /*
738 * then, create a thread to do host part and meanwhile
739 * do rump kernel part right here
740 */
741
742 parg.pfds = pfd_host;
743 parg.nfds = nfds+1;
744 parg.ts = ts;
745 parg.sigmask = sigmask;
746 parg.pipefd = rpipe[1];
747 pthread_create(&pt, NULL, hostpoll, &parg);
748
749 op_pollts = syscalls[DUALCALL_POLLTS].bs_rump;
750 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
751 sverrno = errno;
752 write(hpipe[1], &rv, sizeof(rv));
753 pthread_join(pt, (void *)&trv);
754
755 /* check who "won" and merge results */
756 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
757 rv = trv;
758
759 for (i = 0; i < nfds; i++) {
760 if (pfd_rump[i].fd != -1)
761 fds[i].revents = pfd_rump[i].revents;
762 }
763 sverrno = parg.errnum;
764 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
765 rv = trv;
766
767 for (i = 0; i < nfds; i++) {
768 if (pfd_host[i].fd != -1)
769 fds[i].revents = pfd_host[i].revents;
770 }
771 } else {
772 rv = 0;
773 }
774
775 out:
776 host_close = syscalls[DUALCALL_CLOSE].bs_host;
777 if (rpipe[0] != -1)
778 rump_sys_close(rpipe[0]);
779 if (rpipe[1] != -1)
780 rump_sys_close(rpipe[1]);
781 if (hpipe[0] != -1)
782 host_close(hpipe[0]);
783 if (hpipe[1] != -1)
784 host_close(hpipe[1]);
785 free(pfd_host);
786 free(pfd_rump);
787 errno = sverrno;
788 } else {
789 if (hostcall) {
790 op_pollts = syscalls[DUALCALL_POLLTS].bs_host;
791 } else {
792 op_pollts = syscalls[DUALCALL_POLLTS].bs_rump;
793 adjustpoll(fds, nfds, fd_host2rump);
794 }
795
796 rv = op_pollts(fds, nfds, ts, sigmask);
797 if (rumpcall)
798 adjustpoll(fds, nfds, fd_rump2host);
799 }
800
801 return rv;
802 }
803
804 int
805 poll(struct pollfd *fds, nfds_t nfds, int timeout)
806 {
807 struct timespec ts;
808 struct timespec *tsp = NULL;
809
810 if (timeout != INFTIM) {
811 ts.tv_sec = timeout / 1000;
812 ts.tv_nsec = (timeout % 1000) * 1000*1000;
813
814 tsp = &ts;
815 }
816
817 return REALPOLLTS(fds, nfds, tsp, NULL);
818 }
819
820 int
821 kqueue(void)
822 {
823
824 if (!ISDUP2D(STDERR_FILENO) && isatty(STDERR_FILENO)) {
825 fprintf(stderr, "rumphijack: kqueue currently unsupported\n");
826 }
827 errno = ENOSYS;
828 return -1;
829 }
830
831 /*ARGSUSED*/
832 int
833 kevent(int kq, const struct kevent *changelist, size_t nchanges,
834 struct kevent *eventlist, size_t nevents,
835 const struct timespec *timeout)
836 {
837
838 fprintf(stderr, "kevent impossible\n");
839 abort();
840 /*NOTREACHED*/
841 }
842
843 /*
844 * Rest are std type calls.
845 */
846
847 FDCALL(int, bind, DUALCALL_BIND, \
848 (int fd, const struct sockaddr *name, socklen_t namelen), \
849 (int, const struct sockaddr *, socklen_t), \
850 (fd, name, namelen))
851
852 FDCALL(int, connect, DUALCALL_CONNECT, \
853 (int fd, const struct sockaddr *name, socklen_t namelen), \
854 (int, const struct sockaddr *, socklen_t), \
855 (fd, name, namelen))
856
857 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \
858 (int fd, struct sockaddr *name, socklen_t *namelen), \
859 (int, struct sockaddr *, socklen_t *), \
860 (fd, name, namelen))
861
862 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \
863 (int fd, struct sockaddr *name, socklen_t *namelen), \
864 (int, struct sockaddr *, socklen_t *), \
865 (fd, name, namelen))
866
867 FDCALL(int, listen, DUALCALL_LISTEN, \
868 (int fd, int backlog), \
869 (int, int), \
870 (fd, backlog))
871
872 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \
873 (int fd, void *buf, size_t len, int flags, \
874 struct sockaddr *from, socklen_t *fromlen), \
875 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \
876 (fd, buf, len, flags, from, fromlen))
877
878 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \
879 (int fd, const void *buf, size_t len, int flags, \
880 const struct sockaddr *to, socklen_t tolen), \
881 (int, const void *, size_t, int, \
882 const struct sockaddr *, socklen_t), \
883 (fd, buf, len, flags, to, tolen))
884
885 FDCALL(ssize_t, recvmsg, DUALCALL_RECVMSG, \
886 (int fd, struct msghdr *msg, int flags), \
887 (int, struct msghdr *, int), \
888 (fd, msg, flags))
889
890 FDCALL(ssize_t, sendmsg, DUALCALL_SENDMSG, \
891 (int fd, const struct msghdr *msg, int flags), \
892 (int, const struct msghdr *, int), \
893 (fd, msg, flags))
894
895 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \
896 (int fd, int level, int optn, void *optval, socklen_t *optlen), \
897 (int, int, int, void *, socklen_t *), \
898 (fd, level, optn, optval, optlen))
899
900 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \
901 (int fd, int level, int optn, \
902 const void *optval, socklen_t optlen), \
903 (int, int, int, const void *, socklen_t), \
904 (fd, level, optn, optval, optlen))
905
906 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \
907 (int fd, int how), \
908 (int, int), \
909 (fd, how))
910
911 #if _FORTIFY_SOURCE > 0
912 #define STUB(fun) __ssp_weak_name(fun)
913 ssize_t _sys_readlink(const char * __restrict, char * __restrict, size_t);
914 ssize_t
915 STUB(readlink)(const char * __restrict path, char * __restrict buf,
916 size_t bufsiz)
917 {
918 return _sys_readlink(path, buf, bufsiz);
919 }
920
921 char *_sys_getcwd(char *, size_t);
922 char *
923 STUB(getcwd)(char *buf, size_t size)
924 {
925 return _sys_getcwd(buf, size);
926 }
927 #else
928 #define STUB(fun) fun
929 #endif
930
931 FDCALL(ssize_t, REALREAD, DUALCALL_READ, \
932 (int fd, void *buf, size_t buflen), \
933 (int, void *, size_t), \
934 (fd, buf, buflen))
935
936 FDCALL(ssize_t, readv, DUALCALL_READV, \
937 (int fd, const struct iovec *iov, int iovcnt), \
938 (int, const struct iovec *, int), \
939 (fd, iov, iovcnt))
940
941 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \
942 (int fd, const struct iovec *iov, int iovcnt), \
943 (int, const struct iovec *, int), \
944 (fd, iov, iovcnt))
945
946 FDCALL(int, close, DUALCALL_CLOSE, \
947 (int fd), \
948 (int), \
949 (fd))
950