hijack.c revision 1.7 1 /* $NetBSD: hijack.c,v 1.7 2011/01/09 19:56:33 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __RCSID("$NetBSD: hijack.c,v 1.7 2011/01/09 19:56:33 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/ioctl.h>
34 #include <sys/socket.h>
35 #include <sys/poll.h>
36
37 #include <rump/rump.h>
38 #include <rump/rumpclient.h>
39 #include <rump/rump_syscalls.h>
40
41 #include <assert.h>
42 #include <dlfcn.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <fcntl.h>
46 #include <poll.h>
47 #include <pthread.h>
48 #include <signal.h>
49 #include <stdarg.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <time.h>
53 #include <unistd.h>
54
55 enum { RUMPCALL_SOCKET, RUMPCALL_ACCEPT, RUMPCALL_BIND, RUMPCALL_CONNECT,
56 RUMPCALL_GETPEERNAME, RUMPCALL_GETSOCKNAME, RUMPCALL_LISTEN,
57 RUMPCALL_RECVFROM, RUMPCALL_RECVMSG,
58 RUMPCALL_SENDTO, RUMPCALL_SENDMSG,
59 RUMPCALL_GETSOCKOPT, RUMPCALL_SETSOCKOPT,
60 RUMPCALL_SHUTDOWN,
61 RUMPCALL_READ, RUMPCALL_READV,
62 RUMPCALL_WRITE, RUMPCALL_WRITEV,
63 RUMPCALL_IOCTL, RUMPCALL_FCNTL,
64 RUMPCALL_CLOSE,
65 RUMPCALL_POLLTS,
66 RUMPCALL__NUM
67 };
68
69 const char *sysnames[] = {
70 "__socket30",
71 "accept",
72 "bind",
73 "connect",
74 "getpeername",
75 "getsockname",
76 "listen",
77 "recvfrom",
78 "recvmsg",
79 "sendto",
80 "sendmsg",
81 "getsockopt",
82 "setsockopt",
83 "shutdown",
84 "read",
85 "readv",
86 "write",
87 "writev",
88 "ioctl",
89 "fcntl",
90 "close",
91 "__pollts50",
92 };
93
94 static int (*host_socket)(int, int, int);
95 static int (*host_connect)(int, const struct sockaddr *, socklen_t);
96 static int (*host_bind)(int, const struct sockaddr *, socklen_t);
97 static int (*host_listen)(int, int);
98 static int (*host_accept)(int, struct sockaddr *, socklen_t *);
99 static int (*host_getpeername)(int, struct sockaddr *, socklen_t *);
100 static int (*host_getsockname)(int, struct sockaddr *, socklen_t *);
101 static int (*host_setsockopt)(int, int, int, const void *, socklen_t);
102
103 static ssize_t (*host_read)(int, void *, size_t);
104 static ssize_t (*host_readv)(int, const struct iovec *, int);
105 static ssize_t (*host_write)(int, const void *, size_t);
106 static ssize_t (*host_writev)(int, const struct iovec *, int);
107 static int (*host_ioctl)(int, unsigned long, ...);
108 static int (*host_fcntl)(int, int, ...);
109 static int (*host_close)(int);
110 static int (*host_pollts)(struct pollfd *, nfds_t,
111 const struct timespec *, const sigset_t *);
112 static pid_t (*host_fork)(void);
113 static int (*host_dup2)(int, int);
114
115 static void *rumpcalls[RUMPCALL__NUM];
116
117 /*
118 * This is called from librumpclient in case of LD_PRELOAD.
119 * It ensures correct RTLD_NEXT.
120 */
121 static void *
122 hijackdlsym(void *handle, const char *symbol)
123 {
124
125 return dlsym(handle, symbol);
126 }
127
128 /* low calorie sockets? */
129 static bool hostlocalsockets = false;
130
131 static void __attribute__((constructor))
132 rcinit(void)
133 {
134 int (*rumpcinit)(void);
135 void **rumpcdlsym;
136 void *hand;
137 int i;
138
139 hand = dlopen("librumpclient.so", RTLD_LAZY|RTLD_GLOBAL);
140 if (!hand)
141 err(1, "cannot open librumpclient.so");
142 rumpcinit = dlsym(hand, "rumpclient_init");
143 _DIAGASSERT(rumpcinit);
144
145 rumpcdlsym = dlsym(hand, "rumpclient_dlsym");
146 *rumpcdlsym = hijackdlsym;
147
148 host_socket = dlsym(RTLD_NEXT, "__socket30");
149 host_listen = dlsym(RTLD_NEXT, "listen");
150 host_connect = dlsym(RTLD_NEXT, "connect");
151 host_bind = dlsym(RTLD_NEXT, "bind");
152 host_accept = dlsym(RTLD_NEXT, "accept");
153 host_getpeername = dlsym(RTLD_NEXT, "getpeername");
154 host_getsockname = dlsym(RTLD_NEXT, "getsockname");
155 host_setsockopt = dlsym(RTLD_NEXT, "setsockopt");
156
157 host_read = dlsym(RTLD_NEXT, "read");
158 host_readv = dlsym(RTLD_NEXT, "readv");
159 host_write = dlsym(RTLD_NEXT, "write");
160 host_writev = dlsym(RTLD_NEXT, "writev");
161 host_ioctl = dlsym(RTLD_NEXT, "ioctl");
162 host_fcntl = dlsym(RTLD_NEXT, "fcntl");
163 host_close = dlsym(RTLD_NEXT, "close");
164 host_pollts = dlsym(RTLD_NEXT, "pollts");
165 host_fork = dlsym(RTLD_NEXT, "fork");
166 host_dup2 = dlsym(RTLD_NEXT, "dup2");
167
168 for (i = 0; i < RUMPCALL__NUM; i++) {
169 char sysname[128];
170
171 snprintf(sysname, sizeof(sysname), "rump_sys_%s", sysnames[i]);
172 rumpcalls[i] = dlsym(hand, sysname);
173 if (!rumpcalls[i]) {
174 fprintf(stderr, "cannot find symbol: %s\n", sysname);
175 exit(1);
176 }
177 }
178
179 if (rumpcinit() == -1)
180 err(1, "rumpclient init");
181 }
182
183 static unsigned dup2mask;
184 #define ISDUP2D(fd) (((fd+1) & dup2mask) == ((fd)+1))
185
186 //#define DEBUGJACK
187 #ifdef DEBUGJACK
188 #define DPRINTF(x) mydprintf x
189 static void
190 mydprintf(const char *fmt, ...)
191 {
192 va_list ap;
193
194 if (ISDUP2D(STDERR_FILENO))
195 return;
196
197 va_start(ap, fmt);
198 vfprintf(stderr, fmt, ap);
199 va_end(ap);
200 }
201
202 #else
203 #define DPRINTF(x)
204 #endif
205
206 /* XXX: need runtime selection. low for now due to FD_SETSIZE */
207 #define HIJACK_FDOFF 128
208 #define HIJACK_SELECT 128 /* XXX */
209 #define HIJACK_ASSERT 128 /* XXX */
210 static int
211 fd_rump2host(int fd)
212 {
213
214 if (fd == -1)
215 return fd;
216
217 if (!ISDUP2D(fd))
218 fd += HIJACK_FDOFF;
219
220 return fd;
221 }
222
223 static int
224 fd_host2rump(int fd)
225 {
226
227 if (!ISDUP2D(fd))
228 fd -= HIJACK_FDOFF;
229 return fd;
230 }
231
232 static bool
233 fd_isrump(int fd)
234 {
235
236 return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
237 }
238
239 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
240 #undef HIJACK_FDOFF
241
242 /*
243 * Following wrappers always call the rump kernel.
244 */
245
246 int __socket30(int, int, int);
247 int
248 __socket30(int domain, int type, int protocol)
249 {
250 int (*rc_socket)(int, int, int);
251 int fd;
252 bool dohost;
253
254 dohost = hostlocalsockets && (domain == AF_LOCAL);
255
256 if (dohost)
257 rc_socket = host_socket;
258 else
259 rc_socket = rumpcalls[RUMPCALL_SOCKET];
260 fd = rc_socket(domain, type, protocol);
261
262 if (!dohost)
263 fd = fd_rump2host(fd);
264 DPRINTF(("socket <- %d\n", fd));
265
266 return fd;
267 }
268
269 int
270 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
271 {
272 int (*rc_accept)(int, struct sockaddr *, socklen_t *);
273 int fd;
274 bool isrump;
275
276 isrump = fd_isrump(s);
277
278 DPRINTF(("accept -> %d", s));
279 if (isrump) {
280 rc_accept = rumpcalls[RUMPCALL_ACCEPT];
281 s = fd_host2rump(s);
282 } else {
283 rc_accept = host_accept;
284 }
285 fd = rc_accept(s, addr, addrlen);
286 if (fd != -1 && isrump)
287 fd = fd_rump2host(fd);
288
289 DPRINTF((" <- %d\n", fd));
290
291 return fd;
292 }
293
294 int
295 bind(int s, const struct sockaddr *name, socklen_t namelen)
296 {
297 int (*rc_bind)(int, const struct sockaddr *, socklen_t);
298
299 DPRINTF(("bind -> %d\n", s));
300 if (fd_isrump(s)) {
301 rc_bind = rumpcalls[RUMPCALL_BIND];
302 s = fd_host2rump(s);
303 } else {
304 rc_bind = host_bind;
305 }
306 return rc_bind(s, name, namelen);
307 }
308
309 int
310 connect(int s, const struct sockaddr *name, socklen_t namelen)
311 {
312 int (*rc_connect)(int, const struct sockaddr *, socklen_t);
313
314 DPRINTF(("connect -> %d\n", s));
315 if (fd_isrump(s)) {
316 rc_connect = rumpcalls[RUMPCALL_CONNECT];
317 s = fd_host2rump(s);
318 } else {
319 rc_connect = host_connect;
320 }
321
322 return rc_connect(s, name, namelen);
323 }
324
325 int
326 getpeername(int s, struct sockaddr *name, socklen_t *namelen)
327 {
328 int (*rc_getpeername)(int, struct sockaddr *, socklen_t *);
329
330 DPRINTF(("getpeername -> %d\n", s));
331 if (fd_isrump(s)) {
332 rc_getpeername = rumpcalls[RUMPCALL_GETPEERNAME];
333 s = fd_host2rump(s);
334 } else {
335 rc_getpeername = host_getpeername;
336 }
337 return rc_getpeername(s, name, namelen);
338 }
339
340 int
341 getsockname(int s, struct sockaddr *name, socklen_t *namelen)
342 {
343 int (*rc_getsockname)(int, struct sockaddr *, socklen_t *);
344
345 DPRINTF(("getsockname -> %d\n", s));
346 if (fd_isrump(s)) {
347 rc_getsockname = rumpcalls[RUMPCALL_GETSOCKNAME];
348 s = fd_host2rump(s);
349 } else {
350 rc_getsockname = host_getsockname;
351 }
352 return rc_getsockname(s, name, namelen);
353 }
354
355 int
356 listen(int s, int backlog)
357 {
358 int (*rc_listen)(int, int);
359
360 DPRINTF(("listen -> %d\n", s));
361 if (fd_isrump(s)) {
362 rc_listen = rumpcalls[RUMPCALL_LISTEN];
363 s = fd_host2rump(s);
364 } else {
365 rc_listen = host_listen;
366 }
367 return rc_listen(s, backlog);
368 }
369
370 ssize_t
371 recv(int s, void *buf, size_t len, int flags)
372 {
373
374 return recvfrom(s, buf, len, flags, NULL, NULL);
375 }
376
377 ssize_t
378 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
379 socklen_t *fromlen)
380 {
381 int (*rc_recvfrom)(int, void *, size_t, int,
382 struct sockaddr *, socklen_t *);
383
384 DPRINTF(("recvfrom\n"));
385 assertfd(s);
386 rc_recvfrom = rumpcalls[RUMPCALL_RECVFROM];
387 return rc_recvfrom(fd_host2rump(s), buf, len, flags, from, fromlen);
388 }
389
390 ssize_t
391 recvmsg(int s, struct msghdr *msg, int flags)
392 {
393 int (*rc_recvmsg)(int, struct msghdr *, int);
394
395 DPRINTF(("recvmsg\n"));
396 assertfd(s);
397 rc_recvmsg = rumpcalls[RUMPCALL_RECVMSG];
398 return rc_recvmsg(fd_host2rump(s), msg, flags);
399 }
400
401 ssize_t
402 send(int s, const void *buf, size_t len, int flags)
403 {
404
405 return sendto(s, buf, len, flags, NULL, 0);
406 }
407
408 ssize_t
409 sendto(int s, const void *buf, size_t len, int flags,
410 const struct sockaddr *to, socklen_t tolen)
411 {
412 int (*rc_sendto)(int, const void *, size_t, int,
413 const struct sockaddr *, socklen_t);
414
415 if (s == -1)
416 return len;
417
418 DPRINTF(("sendto\n"));
419 assertfd(s);
420 rc_sendto = rumpcalls[RUMPCALL_SENDTO];
421 return rc_sendto(fd_host2rump(s), buf, len, flags, to, tolen);
422 }
423
424 ssize_t
425 sendmsg(int s, const struct msghdr *msg, int flags)
426 {
427 int (*rc_sendmsg)(int, const struct msghdr *, int);
428
429 DPRINTF(("sendmsg\n"));
430 assertfd(s);
431 rc_sendmsg = rumpcalls[RUMPCALL_SENDTO];
432 return rc_sendmsg(fd_host2rump(s), msg, flags);
433 }
434
435 int
436 getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
437 {
438 int (*rc_getsockopt)(int, int, int, void *, socklen_t *);
439
440 DPRINTF(("getsockopt -> %d\n", s));
441 assertfd(s);
442 rc_getsockopt = rumpcalls[RUMPCALL_GETSOCKOPT];
443 return rc_getsockopt(fd_host2rump(s), level, optname, optval, optlen);
444 }
445
446 int
447 setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen)
448 {
449 int (*rc_setsockopt)(int, int, int, const void *, socklen_t);
450
451 DPRINTF(("setsockopt -> %d\n", s));
452 if (fd_isrump(s)) {
453 rc_setsockopt = rumpcalls[RUMPCALL_SETSOCKOPT];
454 s = fd_host2rump(s);
455 } else {
456 rc_setsockopt = host_setsockopt;
457 }
458 return rc_setsockopt(s, level, optname, optval, optlen);
459 }
460
461 int
462 shutdown(int s, int how)
463 {
464 int (*rc_shutdown)(int, int);
465
466 DPRINTF(("shutdown -> %d\n", s));
467 assertfd(s);
468 rc_shutdown = rumpcalls[RUMPCALL_SHUTDOWN];
469 return rc_shutdown(fd_host2rump(s), how);
470 }
471
472 /*
473 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
474 * many programs do that. dup2 of a rump kernel fd to another value
475 * not >= fdoff is an error.
476 *
477 * Note: cannot rump2host newd, because it is often hardcoded.
478 *
479 * XXX: should disable debug prints after stdout/stderr are dup2'd
480 */
481 int
482 dup2(int oldd, int newd)
483 {
484 int rv;
485
486 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
487
488 if (fd_isrump(oldd)) {
489 if (!(newd >= 0 && newd <= 2))
490 return EBADF;
491 oldd = fd_host2rump(oldd);
492 rv = rump_sys_dup2(oldd, newd);
493 if (rv != -1)
494 dup2mask |= newd+1;
495 return rv;
496 } else {
497 return host_dup2(oldd, newd);
498 }
499 }
500
501 /*
502 * We just wrap fork the appropriate rump client calls to preserve
503 * the file descriptors of the forked parent in the child, but
504 * prevent double use of connection fd.
505 */
506
507 pid_t
508 fork()
509 {
510 struct rumpclient_fork *rf;
511 pid_t rv;
512
513 DPRINTF(("fork\n"));
514
515 if ((rf = rumpclient_prefork()) == NULL)
516 return -1;
517
518 switch ((rv = host_fork())) {
519 case -1:
520 /* XXX: cancel rf */
521 break;
522 case 0:
523 if (rumpclient_fork_init(rf) == -1)
524 rv = -1;
525 break;
526 default:
527 break;
528 }
529
530 DPRINTF(("fork returns %d\n", rv));
531 return rv;
532 }
533
534 /*
535 * Hybrids
536 */
537
538 ssize_t
539 read(int fd, void *buf, size_t len)
540 {
541 int (*op_read)(int, void *, size_t);
542 ssize_t n;
543
544 DPRINTF(("read %d\n", fd));
545 if (fd_isrump(fd)) {
546 fd = fd_host2rump(fd);
547 op_read = rumpcalls[RUMPCALL_READ];
548 } else {
549 op_read = host_read;
550 }
551
552 n = op_read(fd, buf, len);
553 return n;
554 }
555
556 ssize_t
557 readv(int fd, const struct iovec *iov, int iovcnt)
558 {
559 int (*op_readv)(int, const struct iovec *, int);
560
561 DPRINTF(("readv %d\n", fd));
562 if (fd_isrump(fd)) {
563 fd = fd_host2rump(fd);
564 op_readv = rumpcalls[RUMPCALL_READV];
565 } else {
566 op_readv = host_readv;
567 }
568
569 return op_readv(fd, iov, iovcnt);
570 }
571
572 ssize_t
573 write(int fd, const void *buf, size_t len)
574 {
575 int (*op_write)(int, const void *, size_t);
576
577 if (fd_isrump(fd)) {
578 fd = fd_host2rump(fd);
579 op_write = rumpcalls[RUMPCALL_WRITE];
580 } else {
581 op_write = host_write;
582 }
583
584 return op_write(fd, buf, len);
585 }
586
587 ssize_t
588 writev(int fd, const struct iovec *iov, int iovcnt)
589 {
590 int (*op_writev)(int, const struct iovec *, int);
591
592 DPRINTF(("writev %d\n", fd));
593 if (fd_isrump(fd)) {
594 fd = fd_host2rump(fd);
595 op_writev = rumpcalls[RUMPCALL_WRITEV];
596 } else {
597 op_writev = host_writev;
598 }
599
600 return op_writev(fd, iov, iovcnt);
601 }
602
603 int
604 ioctl(int fd, unsigned long cmd, ...)
605 {
606 int (*op_ioctl)(int, unsigned long cmd, ...);
607 va_list ap;
608 int rv;
609
610 DPRINTF(("ioctl\n"));
611 if (fd_isrump(fd)) {
612 fd = fd_host2rump(fd);
613 op_ioctl = rumpcalls[RUMPCALL_IOCTL];
614 } else {
615 op_ioctl = host_ioctl;
616 }
617
618 va_start(ap, cmd);
619 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
620 va_end(ap);
621 return rv;
622 }
623
624 int
625 fcntl(int fd, int cmd, ...)
626 {
627 int (*op_fcntl)(int, int, ...);
628 va_list ap;
629 int rv;
630
631 DPRINTF(("fcntl\n"));
632 if (fd_isrump(fd)) {
633 fd = fd_host2rump(fd);
634 op_fcntl = rumpcalls[RUMPCALL_FCNTL];
635 } else {
636 op_fcntl = host_fcntl;
637 }
638
639 va_start(ap, cmd);
640 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
641 va_end(ap);
642 return rv;
643 }
644
645 int
646 close(int fd)
647 {
648 int (*op_close)(int);
649
650 DPRINTF(("close %d\n", fd));
651 if (fd_isrump(fd)) {
652 fd = fd_host2rump(fd);
653 op_close = rumpcalls[RUMPCALL_CLOSE];
654 } else {
655 op_close = host_close;
656 }
657
658 return op_close(fd);
659 }
660
661 int
662 select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
663 struct timeval *timeout)
664 {
665 struct pollfd *pfds;
666 struct timespec ts, *tsp = NULL;
667 nfds_t i, j, realnfds;
668 int rv, incr;
669
670 DPRINTF(("select\n"));
671
672 /*
673 * Well, first we must scan the fds to figure out how many
674 * fds there really are. This is because up to and including
675 * nb5 poll() silently refuses nfds > process_open_fds.
676 * Seems to be fixed in current, thank the maker.
677 * god damn cluster...bomb.
678 */
679
680 for (i = 0, realnfds = 0; i < nfds; i++) {
681 if (readfds && FD_ISSET(i, readfds)) {
682 realnfds++;
683 continue;
684 }
685 if (writefds && FD_ISSET(i, writefds)) {
686 realnfds++;
687 continue;
688 }
689 if (exceptfds && FD_ISSET(i, exceptfds)) {
690 realnfds++;
691 continue;
692 }
693 }
694
695 if (realnfds) {
696 pfds = malloc(sizeof(*pfds) * realnfds);
697 if (!pfds)
698 return -1;
699 } else {
700 pfds = NULL;
701 }
702
703 for (i = 0, j = 0; i < nfds; i++) {
704 incr = 0;
705 pfds[j].events = pfds[j].revents = 0;
706 if (readfds && FD_ISSET(i, readfds)) {
707 pfds[j].fd = i;
708 pfds[j].events |= POLLIN;
709 incr=1;
710 }
711 if (writefds && FD_ISSET(i, writefds)) {
712 pfds[j].fd = i;
713 pfds[j].events |= POLLOUT;
714 incr=1;
715 }
716 if (exceptfds && FD_ISSET(i, exceptfds)) {
717 pfds[j].fd = i;
718 pfds[j].events |= POLLHUP|POLLERR;
719 incr=1;
720 }
721 if (incr)
722 j++;
723 }
724
725 if (timeout) {
726 TIMEVAL_TO_TIMESPEC(timeout, &ts);
727 tsp = &ts;
728 }
729 rv = pollts(pfds, realnfds, tsp, NULL);
730 if (rv <= 0)
731 goto out;
732
733 /*
734 * ok, harvest results. first zero out entries (can't use
735 * FD_ZERO for the obvious select-me-not reason). whee.
736 */
737 for (i = 0; i < nfds; i++) {
738 if (readfds)
739 FD_CLR(i, readfds);
740 if (writefds)
741 FD_CLR(i, writefds);
742 if (exceptfds)
743 FD_CLR(i, exceptfds);
744 }
745
746 /* and then plug in the results */
747 for (i = 0; i < realnfds; i++) {
748 if (readfds) {
749 if (pfds[i].revents & POLLIN) {
750 FD_SET(pfds[i].fd, readfds);
751 }
752 }
753 if (writefds) {
754 if (pfds[i].revents & POLLOUT) {
755 FD_SET(pfds[i].fd, writefds);
756 }
757 }
758 if (exceptfds) {
759 if (pfds[i].revents & (POLLHUP|POLLERR)) {
760 FD_SET(pfds[i].fd, exceptfds);
761 }
762 }
763 }
764
765 out:
766 free(pfds);
767 return rv;
768 }
769
770 static void
771 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
772 {
773 nfds_t i;
774
775 for (i = 0; i < nfds; i++) {
776 if (fd_isrump(fds[i].fd))
777 (*rumpcall)++;
778 else
779 (*hostcall)++;
780 }
781 }
782
783 static void
784 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
785 {
786 nfds_t i;
787
788 for (i = 0; i < nfds; i++) {
789 fds[i].fd = fdadj(fds[i].fd);
790 }
791 }
792
793 struct mytimespec {
794 uint64_t tv_sec;
795 long tv_nsec;
796 };
797
798 /*
799 * poll is easy as long as the call comes in the fds only in one
800 * kernel. otherwise its quite tricky...
801 */
802 struct pollarg {
803 struct pollfd *pfds;
804 nfds_t nfds;
805 const struct timespec *ts;
806 const sigset_t *sigmask;
807 int pipefd;
808 int errnum;
809 };
810
811 static void *
812 hostpoll(void *arg)
813 {
814 struct pollarg *parg = arg;
815 intptr_t rv;
816
817 rv = host_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
818 if (rv == -1)
819 parg->errnum = errno;
820 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
821
822 return (void *)(intptr_t)rv;
823 }
824
825 int
826 pollts(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
827 const sigset_t *sigmask)
828 {
829 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
830 const sigset_t *);
831 int hostcall = 0, rumpcall = 0;
832 pthread_t pt;
833 nfds_t i;
834 int rv;
835
836 #if 0
837 /* XXX: quick 5.0 kludge. do syscall compat in rumpclient properly */
838 struct mytimespec mts;
839 if (ts) {
840 mts.tv_sec = ts->tv_sec;
841 mts.tv_nsec = ts->tv_nsec;
842 ts = (struct timespec *)&mts;
843 }
844 #endif
845
846 DPRINTF(("poll\n"));
847 checkpoll(fds, nfds, &hostcall, &rumpcall);
848
849 if (hostcall && rumpcall) {
850 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
851 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
852 struct pollarg parg;
853 uintptr_t lrv;
854 int sverrno = 0, trv;
855
856 /*
857 * ok, this is where it gets tricky. We must support
858 * this since it's a very common operation in certain
859 * types of software (telnet, netcat, etc). We allocate
860 * two vectors and run two poll commands in separate
861 * threads. Whichever returns first "wins" and the
862 * other kernel's fds won't show activity.
863 */
864 rv = -1;
865
866 /* allocate full vector for O(n) joining after call */
867 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
868 if (!pfd_host)
869 goto out;
870 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
871 if (!pfd_rump) {
872 goto out;
873 }
874
875 /* split vectors */
876 for (i = 0; i < nfds; i++) {
877 if (fds[i].fd == -1) {
878 pfd_host[i].fd = -1;
879 pfd_rump[i].fd = -1;
880 } else if (fd_isrump(fds[i].fd)) {
881 pfd_host[i].fd = -1;
882 pfd_rump[i].fd = fd_host2rump(fds[i].fd);
883 pfd_rump[i].events = fds[i].events;
884 } else {
885 pfd_rump[i].fd = -1;
886 pfd_host[i].fd = fds[i].fd;
887 pfd_host[i].events = fds[i].events;
888 }
889 }
890
891 /*
892 * then, open two pipes, one for notifications
893 * to each kernel.
894 */
895 if (rump_sys_pipe(rpipe) == -1)
896 goto out;
897 if (pipe(hpipe) == -1)
898 goto out;
899
900 pfd_host[nfds].fd = hpipe[0];
901 pfd_host[nfds].events = POLLIN;
902 pfd_rump[nfds].fd = rpipe[0];
903 pfd_rump[nfds].events = POLLIN;
904
905 /*
906 * then, create a thread to do host part and meanwhile
907 * do rump kernel part right here
908 */
909
910 parg.pfds = pfd_host;
911 parg.nfds = nfds+1;
912 parg.ts = ts;
913 parg.sigmask = sigmask;
914 parg.pipefd = rpipe[1];
915 pthread_create(&pt, NULL, hostpoll, &parg);
916
917 op_pollts = rumpcalls[RUMPCALL_POLLTS];
918 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
919 sverrno = errno;
920 write(hpipe[1], &rv, sizeof(rv));
921 pthread_join(pt, (void *)&trv);
922
923 /* check who "won" and merge results */
924 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
925 rv = trv;
926
927 for (i = 0; i < nfds; i++) {
928 if (pfd_rump[i].fd != -1)
929 fds[i].revents = pfd_rump[i].revents;
930 }
931 sverrno = parg.errnum;
932 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
933 rv = trv;
934
935 for (i = 0; i < nfds; i++) {
936 if (pfd_host[i].fd != -1)
937 fds[i].revents = pfd_host[i].revents;
938 }
939 } else {
940 rv = 0;
941 }
942
943 out:
944 if (rpipe[0] != -1)
945 rump_sys_close(rpipe[0]);
946 if (rpipe[1] != -1)
947 rump_sys_close(rpipe[1]);
948 if (hpipe[0] != -1)
949 close(hpipe[0]);
950 if (hpipe[1] != -1)
951 close(hpipe[1]);
952 free(pfd_host);
953 free(pfd_rump);
954 errno = sverrno;
955 } else {
956 if (hostcall) {
957 op_pollts = host_pollts;
958 } else {
959 op_pollts = rumpcalls[RUMPCALL_POLLTS];
960 adjustpoll(fds, nfds, fd_host2rump);
961 }
962
963 rv = op_pollts(fds, nfds, ts, sigmask);
964 if (rumpcall)
965 adjustpoll(fds, nfds, fd_rump2host);
966 }
967
968 return rv;
969 }
970
971 int
972 poll(struct pollfd *fds, nfds_t nfds, int timeout)
973 {
974 struct timespec ts;
975 struct timespec *tsp = NULL;
976
977 if (timeout != INFTIM) {
978 ts.tv_sec = timeout / 1000;
979 ts.tv_nsec = (timeout % 1000) * 1000;
980
981 tsp = &ts;
982 }
983
984 return pollts(fds, nfds, tsp, NULL);
985 }
986