hijack.c revision 1.29 1 /* $NetBSD: hijack.c,v 1.29 2011/02/07 10:28:18 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __RCSID("$NetBSD: hijack.c,v 1.29 2011/02/07 10:28:18 pooka Exp $");
30
31 #define __ssp_weak_name(fun) _hijack_ ## fun
32
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/event.h>
36 #include <sys/ioctl.h>
37 #include <sys/socket.h>
38 #include <sys/poll.h>
39
40 #include <rump/rumpclient.h>
41 #include <rump/rump_syscalls.h>
42
43 #include <assert.h>
44 #include <dlfcn.h>
45 #include <err.h>
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <poll.h>
49 #include <pthread.h>
50 #include <signal.h>
51 #include <stdarg.h>
52 #include <stdbool.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <time.h>
57 #include <unistd.h>
58
59 enum dualcall {
60 DUALCALL_WRITE, DUALCALL_WRITEV,
61 DUALCALL_IOCTL, DUALCALL_FCNTL,
62 DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT,
63 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN,
64 DUALCALL_RECVFROM, DUALCALL_RECVMSG,
65 DUALCALL_SENDTO, DUALCALL_SENDMSG,
66 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT,
67 DUALCALL_SHUTDOWN,
68 DUALCALL_READ, DUALCALL_READV,
69 DUALCALL_DUP2, DUALCALL_CLOSE,
70 DUALCALL_POLLTS,
71 DUALCALL__NUM
72 };
73
74 #define RSYS_STRING(a) __STRING(a)
75 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
76
77 /*
78 * Would be nice to get this automatically in sync with libc.
79 * Also, this does not work for compat-using binaries!
80 */
81 #if !__NetBSD_Prereq__(5,99,7)
82 #define REALSELECT select
83 #define REALPOLLTS pollts
84 #else
85 #define REALSELECT _sys___select50
86 #define REALPOLLTS _sys___pollts50
87 #endif
88
89 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
90 int REALPOLLTS(struct pollfd *, nfds_t,
91 const struct timespec *, const sigset_t *);
92
93 #define S(a) __STRING(a)
94 struct sysnames {
95 enum dualcall scm_callnum;
96 const char *scm_hostname;
97 const char *scm_rumpname;
98 } syscnames[] = {
99 { DUALCALL_SOCKET, "__socket30", RSYS_NAME(SOCKET) },
100 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) },
101 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) },
102 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) },
103 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) },
104 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) },
105 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) },
106 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) },
107 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) },
108 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) },
109 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) },
110 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) },
111 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) },
112 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) },
113 { DUALCALL_READ, "read", RSYS_NAME(READ) },
114 { DUALCALL_READV, "readv", RSYS_NAME(READV) },
115 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) },
116 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) },
117 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) },
118 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) },
119 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) },
120 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) },
121 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) },
122 };
123 #undef S
124
125 struct bothsys {
126 void *bs_host;
127 void *bs_rump;
128 } syscalls[DUALCALL__NUM];
129 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which
130
131 pid_t (*host_fork)(void);
132 int (*host_daemon)(int, int);
133
134 static unsigned dup2mask;
135 #define ISDUP2D(fd) (1<<(fd) & dup2mask)
136
137 //#define DEBUGJACK
138 #ifdef DEBUGJACK
139 #define DPRINTF(x) mydprintf x
140 static void
141 mydprintf(const char *fmt, ...)
142 {
143 va_list ap;
144
145 if (ISDUP2D(STDERR_FILENO))
146 return;
147
148 va_start(ap, fmt);
149 vfprintf(stderr, fmt, ap);
150 va_end(ap);
151 }
152
153 #else
154 #define DPRINTF(x)
155 #endif
156
157 #define FDCALL(type, name, rcname, args, proto, vars) \
158 type name args \
159 { \
160 type (*fun) proto; \
161 \
162 if (fd_isrump(fd)) { \
163 fun = syscalls[rcname].bs_rump; \
164 fd = fd_host2rump(fd); \
165 } else { \
166 fun = syscalls[rcname].bs_host; \
167 } \
168 \
169 return fun vars; \
170 }
171
172 /*
173 * This is called from librumpclient in case of LD_PRELOAD.
174 * It ensures correct RTLD_NEXT.
175 */
176 static void *
177 hijackdlsym(void *handle, const char *symbol)
178 {
179
180 return dlsym(handle, symbol);
181 }
182
183 /* low calorie sockets? */
184 static bool hostlocalsockets = true;
185
186 static void __attribute__((constructor))
187 rcinit(void)
188 {
189 char buf[64];
190 extern void *(*rumpclient_dlsym)(void *, const char *);
191 unsigned i, j;
192
193 rumpclient_dlsym = hijackdlsym;
194 host_fork = dlsym(RTLD_NEXT, "fork");
195 host_daemon = dlsym(RTLD_NEXT, "daemon");
196
197 /*
198 * In theory cannot print anything during lookups because
199 * we might not have the call vector set up. so, the errx()
200 * is a bit of a strech, but it might work.
201 */
202
203 for (i = 0; i < DUALCALL__NUM; i++) {
204 /* build runtime O(1) access */
205 for (j = 0; j < __arraycount(syscnames); j++) {
206 if (syscnames[j].scm_callnum == i)
207 break;
208 }
209
210 if (j == __arraycount(syscnames))
211 errx(1, "rumphijack error: syscall pos %d missing", i);
212
213 syscalls[i].bs_host = dlsym(RTLD_NEXT,
214 syscnames[j].scm_hostname);
215 if (syscalls[i].bs_host == NULL)
216 errx(1, "hostcall %s not found missing",
217 syscnames[j].scm_hostname);
218
219 syscalls[i].bs_rump = dlsym(RTLD_NEXT,
220 syscnames[j].scm_rumpname);
221 if (syscalls[i].bs_rump == NULL)
222 errx(1, "rumpcall %s not found missing",
223 syscnames[j].scm_rumpname);
224 }
225
226 if (rumpclient_init() == -1)
227 err(1, "rumpclient init");
228
229 /* set client persistence level */
230 if (getenv_r("RUMPHIJACK_RETRY", buf, sizeof(buf)) == -1) {
231 if (errno == ERANGE)
232 err(1, "invalid RUMPHIJACK_RETRY");
233 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME);
234 } else {
235 if (strcmp(buf, "die") == 0)
236 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE);
237 else if (strcmp(buf, "inftime") == 0)
238 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME);
239 else if (strcmp(buf, "once") == 0)
240 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE);
241 else {
242 time_t timeout;
243
244 timeout = (time_t)strtoll(buf, NULL, 10);
245 if (timeout <= 0)
246 errx(1, "RUMPHIJACK_RETRY must be keyword "
247 "or a positive integer, got: %s", buf);
248
249 rumpclient_setconnretry(timeout);
250 }
251 }
252 }
253
254 /* XXX: need runtime selection. low for now due to FD_SETSIZE */
255 #define HIJACK_FDOFF 128
256 #define HIJACK_SELECT 128 /* XXX */
257 #define HIJACK_ASSERT 128 /* XXX */
258 static int
259 fd_rump2host(int fd)
260 {
261
262 if (fd == -1)
263 return fd;
264
265 if (!ISDUP2D(fd))
266 fd += HIJACK_FDOFF;
267
268 return fd;
269 }
270
271 static int
272 fd_host2rump(int fd)
273 {
274
275 if (!ISDUP2D(fd))
276 fd -= HIJACK_FDOFF;
277 return fd;
278 }
279
280 static bool
281 fd_isrump(int fd)
282 {
283
284 return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
285 }
286
287 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
288 #undef HIJACK_FDOFF
289
290 int __socket30(int, int, int);
291 int
292 __socket30(int domain, int type, int protocol)
293 {
294 int (*op_socket)(int, int, int);
295 int fd;
296 bool dohost;
297
298 dohost = hostlocalsockets && (domain == AF_LOCAL);
299
300 if (dohost)
301 op_socket = GETSYSCALL(host, SOCKET);
302 else
303 op_socket = GETSYSCALL(rump, SOCKET);
304 fd = op_socket(domain, type, protocol);
305
306 if (!dohost)
307 fd = fd_rump2host(fd);
308 DPRINTF(("socket <- %d\n", fd));
309
310 return fd;
311 }
312
313 int
314 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
315 {
316 int (*op_accept)(int, struct sockaddr *, socklen_t *);
317 int fd;
318 bool isrump;
319
320 isrump = fd_isrump(s);
321
322 DPRINTF(("accept -> %d", s));
323 if (isrump) {
324 op_accept = GETSYSCALL(rump, ACCEPT);
325 s = fd_host2rump(s);
326 } else {
327 op_accept = GETSYSCALL(host, ACCEPT);
328 }
329 fd = op_accept(s, addr, addrlen);
330 if (fd != -1 && isrump)
331 fd = fd_rump2host(fd);
332
333 DPRINTF((" <- %d\n", fd));
334
335 return fd;
336 }
337
338 /*
339 * ioctl and fcntl are varargs calls and need special treatment
340 */
341 int
342 ioctl(int fd, unsigned long cmd, ...)
343 {
344 int (*op_ioctl)(int, unsigned long cmd, ...);
345 va_list ap;
346 int rv;
347
348 DPRINTF(("ioctl -> %d\n", fd));
349 if (fd_isrump(fd)) {
350 fd = fd_host2rump(fd);
351 op_ioctl = GETSYSCALL(rump, IOCTL);
352 } else {
353 op_ioctl = GETSYSCALL(host, IOCTL);
354 }
355
356 va_start(ap, cmd);
357 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
358 va_end(ap);
359 return rv;
360 }
361
362 int
363 fcntl(int fd, int cmd, ...)
364 {
365 int (*op_fcntl)(int, int, ...);
366 va_list ap;
367 int rv;
368
369 DPRINTF(("fcntl -> %d\n", fd));
370 if (fd_isrump(fd)) {
371 fd = fd_host2rump(fd);
372 op_fcntl = GETSYSCALL(rump, FCNTL);
373 } else {
374 op_fcntl = GETSYSCALL(host, FCNTL);
375 }
376
377 va_start(ap, cmd);
378 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
379 va_end(ap);
380 return rv;
381 }
382
383 /*
384 * write cannot issue a standard debug printf due to recursion
385 */
386 ssize_t
387 write(int fd, const void *buf, size_t blen)
388 {
389 ssize_t (*op_write)(int, const void *, size_t);
390
391 if (fd_isrump(fd)) {
392 fd = fd_host2rump(fd);
393 op_write = GETSYSCALL(rump, WRITE);
394 } else {
395 op_write = GETSYSCALL(host, WRITE);
396 }
397
398 return op_write(fd, buf, blen);
399 }
400
401 /*
402 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
403 * many programs do that. dup2 of a rump kernel fd to another value
404 * not >= fdoff is an error.
405 *
406 * Note: cannot rump2host newd, because it is often hardcoded.
407 */
408 int
409 dup2(int oldd, int newd)
410 {
411 int (*host_dup2)(int, int);
412 int rv;
413
414 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
415
416 if (fd_isrump(oldd)) {
417 if (!(newd >= 0 && newd <= 2))
418 return EBADF;
419 oldd = fd_host2rump(oldd);
420 rv = rump_sys_dup2(oldd, newd);
421 if (rv != -1)
422 dup2mask |= 1<<newd;
423 } else {
424 host_dup2 = syscalls[DUALCALL_DUP2].bs_host;
425 rv = host_dup2(oldd, newd);
426 }
427
428 return rv;
429 }
430
431 /*
432 * We just wrap fork the appropriate rump client calls to preserve
433 * the file descriptors of the forked parent in the child, but
434 * prevent double use of connection fd.
435 */
436 pid_t
437 fork()
438 {
439 struct rumpclient_fork *rf;
440 pid_t rv;
441
442 DPRINTF(("fork\n"));
443
444 if ((rf = rumpclient_prefork()) == NULL)
445 return -1;
446
447 switch ((rv = host_fork())) {
448 case -1:
449 /* XXX: cancel rf */
450 break;
451 case 0:
452 if (rumpclient_fork_init(rf) == -1)
453 rv = -1;
454 break;
455 default:
456 break;
457 }
458
459 DPRINTF(("fork returns %d\n", rv));
460 return rv;
461 }
462
463 int
464 daemon(int nochdir, int noclose)
465 {
466 struct rumpclient_fork *rf;
467
468 if ((rf = rumpclient_prefork()) == NULL)
469 return -1;
470
471 if (host_daemon(nochdir, noclose) == -1)
472 return -1;
473
474 if (rumpclient_fork_init(rf) == -1)
475 return -1;
476
477 return 0;
478 }
479
480 /*
481 * select is done by calling poll.
482 */
483 int
484 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
485 struct timeval *timeout)
486 {
487 struct pollfd *pfds;
488 struct timespec ts, *tsp = NULL;
489 nfds_t realnfds;
490 int i, j;
491 int rv, incr;
492
493 DPRINTF(("select\n"));
494
495 /*
496 * Well, first we must scan the fds to figure out how many
497 * fds there really are. This is because up to and including
498 * nb5 poll() silently refuses nfds > process_maxopen_fds.
499 * Seems to be fixed in current, thank the maker.
500 * god damn cluster...bomb.
501 */
502
503 for (i = 0, realnfds = 0; i < nfds; i++) {
504 if (readfds && FD_ISSET(i, readfds)) {
505 realnfds++;
506 continue;
507 }
508 if (writefds && FD_ISSET(i, writefds)) {
509 realnfds++;
510 continue;
511 }
512 if (exceptfds && FD_ISSET(i, exceptfds)) {
513 realnfds++;
514 continue;
515 }
516 }
517
518 if (realnfds) {
519 pfds = malloc(sizeof(*pfds) * realnfds);
520 if (!pfds)
521 return -1;
522 } else {
523 pfds = NULL;
524 }
525
526 for (i = 0, j = 0; i < nfds; i++) {
527 incr = 0;
528 pfds[j].events = pfds[j].revents = 0;
529 if (readfds && FD_ISSET(i, readfds)) {
530 pfds[j].fd = i;
531 pfds[j].events |= POLLIN;
532 incr=1;
533 }
534 if (writefds && FD_ISSET(i, writefds)) {
535 pfds[j].fd = i;
536 pfds[j].events |= POLLOUT;
537 incr=1;
538 }
539 if (exceptfds && FD_ISSET(i, exceptfds)) {
540 pfds[j].fd = i;
541 pfds[j].events |= POLLHUP|POLLERR;
542 incr=1;
543 }
544 if (incr)
545 j++;
546 }
547
548 if (timeout) {
549 TIMEVAL_TO_TIMESPEC(timeout, &ts);
550 tsp = &ts;
551 }
552 rv = REALPOLLTS(pfds, realnfds, tsp, NULL);
553 if (rv <= 0)
554 goto out;
555
556 /*
557 * ok, harvest results. first zero out entries (can't use
558 * FD_ZERO for the obvious select-me-not reason). whee.
559 */
560 for (i = 0; i < nfds; i++) {
561 if (readfds)
562 FD_CLR(i, readfds);
563 if (writefds)
564 FD_CLR(i, writefds);
565 if (exceptfds)
566 FD_CLR(i, exceptfds);
567 }
568
569 /* and then plug in the results */
570 for (i = 0; i < (int)realnfds; i++) {
571 if (readfds) {
572 if (pfds[i].revents & POLLIN) {
573 FD_SET(pfds[i].fd, readfds);
574 }
575 }
576 if (writefds) {
577 if (pfds[i].revents & POLLOUT) {
578 FD_SET(pfds[i].fd, writefds);
579 }
580 }
581 if (exceptfds) {
582 if (pfds[i].revents & (POLLHUP|POLLERR)) {
583 FD_SET(pfds[i].fd, exceptfds);
584 }
585 }
586 }
587
588 out:
589 free(pfds);
590 return rv;
591 }
592
593 static void
594 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
595 {
596 nfds_t i;
597
598 for (i = 0; i < nfds; i++) {
599 if (fds[i].fd == -1)
600 continue;
601
602 if (fd_isrump(fds[i].fd))
603 (*rumpcall)++;
604 else
605 (*hostcall)++;
606 }
607 }
608
609 static void
610 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
611 {
612 nfds_t i;
613
614 for (i = 0; i < nfds; i++) {
615 fds[i].fd = fdadj(fds[i].fd);
616 }
617 }
618
619 /*
620 * poll is easy as long as the call comes in the fds only in one
621 * kernel. otherwise its quite tricky...
622 */
623 struct pollarg {
624 struct pollfd *pfds;
625 nfds_t nfds;
626 const struct timespec *ts;
627 const sigset_t *sigmask;
628 int pipefd;
629 int errnum;
630 };
631
632 static void *
633 hostpoll(void *arg)
634 {
635 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
636 const sigset_t *);
637 struct pollarg *parg = arg;
638 intptr_t rv;
639
640 op_pollts = syscalls[DUALCALL_POLLTS].bs_host;
641 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
642 if (rv == -1)
643 parg->errnum = errno;
644 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
645
646 return (void *)(intptr_t)rv;
647 }
648
649 int
650 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
651 const sigset_t *sigmask)
652 {
653 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
654 const sigset_t *);
655 int (*host_close)(int);
656 int hostcall = 0, rumpcall = 0;
657 pthread_t pt;
658 nfds_t i;
659 int rv;
660
661 DPRINTF(("poll\n"));
662 checkpoll(fds, nfds, &hostcall, &rumpcall);
663
664 if (hostcall && rumpcall) {
665 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
666 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
667 struct pollarg parg;
668 uintptr_t lrv;
669 int sverrno = 0, trv;
670
671 /*
672 * ok, this is where it gets tricky. We must support
673 * this since it's a very common operation in certain
674 * types of software (telnet, netcat, etc). We allocate
675 * two vectors and run two poll commands in separate
676 * threads. Whichever returns first "wins" and the
677 * other kernel's fds won't show activity.
678 */
679 rv = -1;
680
681 /* allocate full vector for O(n) joining after call */
682 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
683 if (!pfd_host)
684 goto out;
685 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
686 if (!pfd_rump) {
687 goto out;
688 }
689
690 /* split vectors */
691 for (i = 0; i < nfds; i++) {
692 if (fds[i].fd == -1) {
693 pfd_host[i].fd = -1;
694 pfd_rump[i].fd = -1;
695 } else if (fd_isrump(fds[i].fd)) {
696 pfd_host[i].fd = -1;
697 pfd_rump[i].fd = fd_host2rump(fds[i].fd);
698 pfd_rump[i].events = fds[i].events;
699 } else {
700 pfd_rump[i].fd = -1;
701 pfd_host[i].fd = fds[i].fd;
702 pfd_host[i].events = fds[i].events;
703 }
704 fds[i].revents = 0;
705 }
706
707 /*
708 * then, open two pipes, one for notifications
709 * to each kernel.
710 */
711 if (rump_sys_pipe(rpipe) == -1)
712 goto out;
713 if (pipe(hpipe) == -1)
714 goto out;
715
716 pfd_host[nfds].fd = hpipe[0];
717 pfd_host[nfds].events = POLLIN;
718 pfd_rump[nfds].fd = rpipe[0];
719 pfd_rump[nfds].events = POLLIN;
720
721 /*
722 * then, create a thread to do host part and meanwhile
723 * do rump kernel part right here
724 */
725
726 parg.pfds = pfd_host;
727 parg.nfds = nfds+1;
728 parg.ts = ts;
729 parg.sigmask = sigmask;
730 parg.pipefd = rpipe[1];
731 pthread_create(&pt, NULL, hostpoll, &parg);
732
733 op_pollts = syscalls[DUALCALL_POLLTS].bs_rump;
734 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
735 sverrno = errno;
736 write(hpipe[1], &rv, sizeof(rv));
737 pthread_join(pt, (void *)&trv);
738
739 /* check who "won" and merge results */
740 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
741 rv = trv;
742
743 for (i = 0; i < nfds; i++) {
744 if (pfd_rump[i].fd != -1)
745 fds[i].revents = pfd_rump[i].revents;
746 }
747 sverrno = parg.errnum;
748 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
749 rv = trv;
750
751 for (i = 0; i < nfds; i++) {
752 if (pfd_host[i].fd != -1)
753 fds[i].revents = pfd_host[i].revents;
754 }
755 } else {
756 rv = 0;
757 }
758
759 out:
760 host_close = syscalls[DUALCALL_CLOSE].bs_host;
761 if (rpipe[0] != -1)
762 rump_sys_close(rpipe[0]);
763 if (rpipe[1] != -1)
764 rump_sys_close(rpipe[1]);
765 if (hpipe[0] != -1)
766 host_close(hpipe[0]);
767 if (hpipe[1] != -1)
768 host_close(hpipe[1]);
769 free(pfd_host);
770 free(pfd_rump);
771 errno = sverrno;
772 } else {
773 if (hostcall) {
774 op_pollts = syscalls[DUALCALL_POLLTS].bs_host;
775 } else {
776 op_pollts = syscalls[DUALCALL_POLLTS].bs_rump;
777 adjustpoll(fds, nfds, fd_host2rump);
778 }
779
780 rv = op_pollts(fds, nfds, ts, sigmask);
781 if (rumpcall)
782 adjustpoll(fds, nfds, fd_rump2host);
783 }
784
785 return rv;
786 }
787
788 int
789 poll(struct pollfd *fds, nfds_t nfds, int timeout)
790 {
791 struct timespec ts;
792 struct timespec *tsp = NULL;
793
794 if (timeout != INFTIM) {
795 ts.tv_sec = timeout / 1000;
796 ts.tv_nsec = (timeout % 1000) * 1000*1000;
797
798 tsp = &ts;
799 }
800
801 return REALPOLLTS(fds, nfds, tsp, NULL);
802 }
803
804 int
805 kqueue(void)
806 {
807
808 if (!ISDUP2D(STDERR_FILENO) && isatty(STDERR_FILENO)) {
809 fprintf(stderr, "rumphijack: kqueue currently unsupported\n");
810 }
811 errno = ENOSYS;
812 return -1;
813 }
814
815 /*ARGSUSED*/
816 int
817 kevent(int kq, const struct kevent *changelist, size_t nchanges,
818 struct kevent *eventlist, size_t nevents,
819 const struct timespec *timeout)
820 {
821
822 fprintf(stderr, "kevent impossible\n");
823 abort();
824 /*NOTREACHED*/
825 }
826
827 /*
828 * Rest are std type calls.
829 */
830
831 FDCALL(int, bind, DUALCALL_BIND, \
832 (int fd, const struct sockaddr *name, socklen_t namelen), \
833 (int, const struct sockaddr *, socklen_t), \
834 (fd, name, namelen))
835
836 FDCALL(int, connect, DUALCALL_CONNECT, \
837 (int fd, const struct sockaddr *name, socklen_t namelen), \
838 (int, const struct sockaddr *, socklen_t), \
839 (fd, name, namelen))
840
841 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \
842 (int fd, struct sockaddr *name, socklen_t *namelen), \
843 (int, struct sockaddr *, socklen_t *), \
844 (fd, name, namelen))
845
846 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \
847 (int fd, struct sockaddr *name, socklen_t *namelen), \
848 (int, struct sockaddr *, socklen_t *), \
849 (fd, name, namelen))
850
851 FDCALL(int, listen, DUALCALL_LISTEN, \
852 (int fd, int backlog), \
853 (int, int), \
854 (fd, backlog))
855
856 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \
857 (int fd, void *buf, size_t len, int flags, \
858 struct sockaddr *from, socklen_t *fromlen), \
859 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \
860 (fd, buf, len, flags, from, fromlen))
861
862 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \
863 (int fd, const void *buf, size_t len, int flags, \
864 const struct sockaddr *to, socklen_t tolen), \
865 (int, const void *, size_t, int, \
866 const struct sockaddr *, socklen_t), \
867 (fd, buf, len, flags, to, tolen))
868
869 FDCALL(ssize_t, recvmsg, DUALCALL_RECVMSG, \
870 (int fd, struct msghdr *msg, int flags), \
871 (int, struct msghdr *, int), \
872 (fd, msg, flags))
873
874 FDCALL(ssize_t, sendmsg, DUALCALL_SENDMSG, \
875 (int fd, const struct msghdr *msg, int flags), \
876 (int, const struct msghdr *, int), \
877 (fd, msg, flags))
878
879 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \
880 (int fd, int level, int optn, void *optval, socklen_t *optlen), \
881 (int, int, int, void *, socklen_t *), \
882 (fd, level, optn, optval, optlen))
883
884 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \
885 (int fd, int level, int optn, \
886 const void *optval, socklen_t optlen), \
887 (int, int, int, const void *, socklen_t), \
888 (fd, level, optn, optval, optlen))
889
890 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \
891 (int fd, int how), \
892 (int, int), \
893 (fd, how))
894
895 #if _FORTIFY_SOURCE > 0
896 #define STUB(fun) __ssp_weak_name(fun)
897 ssize_t _sys_readlink(const char * __restrict, char * __restrict, size_t);
898 ssize_t
899 STUB(readlink)(const char * __restrict path, char * __restrict buf,
900 size_t bufsiz)
901 {
902 return _sys_readlink(path, buf, bufsiz);
903 }
904
905 char *_sys_getcwd(char *, size_t);
906 char *
907 STUB(getcwd)(char *buf, size_t size)
908 {
909 return _sys_getcwd(buf, size);
910 }
911 #else
912 #define STUB(fun) fun
913 #endif
914
915 FDCALL(ssize_t, STUB(read), DUALCALL_READ, \
916 (int fd, void *buf, size_t buflen), \
917 (int, void *, size_t), \
918 (fd, buf, buflen))
919
920 FDCALL(ssize_t, readv, DUALCALL_READV, \
921 (int fd, const struct iovec *iov, int iovcnt), \
922 (int, const struct iovec *, int), \
923 (fd, iov, iovcnt))
924
925 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \
926 (int fd, const struct iovec *iov, int iovcnt), \
927 (int, const struct iovec *, int), \
928 (fd, iov, iovcnt))
929
930 FDCALL(int, close, DUALCALL_CLOSE, \
931 (int fd), \
932 (int), \
933 (fd))
934