hijack.c revision 1.8 1 /* $NetBSD: hijack.c,v 1.8 2011/01/17 16:27:54 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __RCSID("$NetBSD: hijack.c,v 1.8 2011/01/17 16:27:54 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/ioctl.h>
34 #include <sys/socket.h>
35 #include <sys/poll.h>
36
37 #include <rump/rumpclient.h>
38 #include <rump/rump_syscalls.h>
39
40 #include <assert.h>
41 #include <dlfcn.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <poll.h>
46 #include <pthread.h>
47 #include <signal.h>
48 #include <stdarg.h>
49 #include <stdbool.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <time.h>
53 #include <unistd.h>
54
55 enum { RUMPCALL_SOCKET, RUMPCALL_ACCEPT, RUMPCALL_BIND, RUMPCALL_CONNECT,
56 RUMPCALL_GETPEERNAME, RUMPCALL_GETSOCKNAME, RUMPCALL_LISTEN,
57 RUMPCALL_RECVFROM, RUMPCALL_RECVMSG,
58 RUMPCALL_SENDTO, RUMPCALL_SENDMSG,
59 RUMPCALL_GETSOCKOPT, RUMPCALL_SETSOCKOPT,
60 RUMPCALL_SHUTDOWN,
61 RUMPCALL_READ, RUMPCALL_READV,
62 RUMPCALL_WRITE, RUMPCALL_WRITEV,
63 RUMPCALL_IOCTL, RUMPCALL_FCNTL,
64 RUMPCALL_CLOSE,
65 RUMPCALL_POLLTS,
66 RUMPCALL__NUM
67 };
68
69 #define RSYS_STRING(a) __STRING(a)
70 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
71
72 const char *sysnames[] = {
73 RSYS_NAME(SOCKET),
74 RSYS_NAME(ACCEPT),
75 RSYS_NAME(BIND),
76 RSYS_NAME(CONNECT),
77 RSYS_NAME(GETPEERNAME),
78 RSYS_NAME(GETSOCKNAME),
79 RSYS_NAME(LISTEN),
80 RSYS_NAME(RECVFROM),
81 RSYS_NAME(RECVMSG),
82 RSYS_NAME(SENDTO),
83 RSYS_NAME(SENDMSG),
84 RSYS_NAME(GETSOCKOPT),
85 RSYS_NAME(SETSOCKOPT),
86 RSYS_NAME(SHUTDOWN),
87 RSYS_NAME(READ),
88 RSYS_NAME(READV),
89 RSYS_NAME(WRITE),
90 RSYS_NAME(WRITEV),
91 RSYS_NAME(IOCTL),
92 RSYS_NAME(FCNTL),
93 RSYS_NAME(CLOSE),
94 RSYS_NAME(POLLTS),
95 };
96
97 static int (*host_socket)(int, int, int);
98 static int (*host_connect)(int, const struct sockaddr *, socklen_t);
99 static int (*host_bind)(int, const struct sockaddr *, socklen_t);
100 static int (*host_listen)(int, int);
101 static int (*host_accept)(int, struct sockaddr *, socklen_t *);
102 static int (*host_getpeername)(int, struct sockaddr *, socklen_t *);
103 static int (*host_getsockname)(int, struct sockaddr *, socklen_t *);
104 static int (*host_setsockopt)(int, int, int, const void *, socklen_t);
105
106 static ssize_t (*host_read)(int, void *, size_t);
107 static ssize_t (*host_readv)(int, const struct iovec *, int);
108 static ssize_t (*host_write)(int, const void *, size_t);
109 static ssize_t (*host_writev)(int, const struct iovec *, int);
110 static int (*host_ioctl)(int, unsigned long, ...);
111 static int (*host_fcntl)(int, int, ...);
112 static int (*host_close)(int);
113 static int (*host_pollts)(struct pollfd *, nfds_t,
114 const struct timespec *, const sigset_t *);
115 static pid_t (*host_fork)(void);
116 static int (*host_dup2)(int, int);
117
118 static void *rumpcalls[RUMPCALL__NUM];
119
120 /*
121 * This is called from librumpclient in case of LD_PRELOAD.
122 * It ensures correct RTLD_NEXT.
123 */
124 static void *
125 hijackdlsym(void *handle, const char *symbol)
126 {
127
128 return dlsym(handle, symbol);
129 }
130
131 /* low calorie sockets? */
132 static bool hostlocalsockets = false;
133
134 static void __attribute__((constructor))
135 rcinit(void)
136 {
137 int (*rumpcinit)(void);
138 void **rumpcdlsym;
139 void *hand;
140 int i;
141
142 hand = dlopen("librumpclient.so", RTLD_LAZY|RTLD_GLOBAL);
143 if (!hand)
144 err(1, "cannot open librumpclient.so");
145 rumpcinit = dlsym(hand, "rumpclient_init");
146 _DIAGASSERT(rumpcinit);
147
148 rumpcdlsym = dlsym(hand, "rumpclient_dlsym");
149 *rumpcdlsym = hijackdlsym;
150
151 host_socket = dlsym(RTLD_NEXT, "__socket30");
152 host_listen = dlsym(RTLD_NEXT, "listen");
153 host_connect = dlsym(RTLD_NEXT, "connect");
154 host_bind = dlsym(RTLD_NEXT, "bind");
155 host_accept = dlsym(RTLD_NEXT, "accept");
156 host_getpeername = dlsym(RTLD_NEXT, "getpeername");
157 host_getsockname = dlsym(RTLD_NEXT, "getsockname");
158 host_setsockopt = dlsym(RTLD_NEXT, "setsockopt");
159
160 host_read = dlsym(RTLD_NEXT, "read");
161 host_readv = dlsym(RTLD_NEXT, "readv");
162 host_write = dlsym(RTLD_NEXT, "write");
163 host_writev = dlsym(RTLD_NEXT, "writev");
164 host_ioctl = dlsym(RTLD_NEXT, "ioctl");
165 host_fcntl = dlsym(RTLD_NEXT, "fcntl");
166 host_close = dlsym(RTLD_NEXT, "close");
167 host_pollts = dlsym(RTLD_NEXT, "pollts");
168 host_fork = dlsym(RTLD_NEXT, "fork");
169 host_dup2 = dlsym(RTLD_NEXT, "dup2");
170
171 for (i = 0; i < RUMPCALL__NUM; i++) {
172 rumpcalls[i] = dlsym(hand, sysnames[i]);
173 if (!rumpcalls[i]) {
174 fprintf(stderr, "rumphijack: cannot find symbol: %s\n",
175 sysnames[i]);
176 exit(1);
177 }
178 }
179
180 if (rumpcinit() == -1)
181 err(1, "rumpclient init");
182 }
183
184 static unsigned dup2mask;
185 #define ISDUP2D(fd) (((fd+1) & dup2mask) == ((fd)+1))
186
187 //#define DEBUGJACK
188 #ifdef DEBUGJACK
189 #define DPRINTF(x) mydprintf x
190 static void
191 mydprintf(const char *fmt, ...)
192 {
193 va_list ap;
194
195 if (ISDUP2D(STDERR_FILENO))
196 return;
197
198 va_start(ap, fmt);
199 vfprintf(stderr, fmt, ap);
200 va_end(ap);
201 }
202
203 #else
204 #define DPRINTF(x)
205 #endif
206
207 /* XXX: need runtime selection. low for now due to FD_SETSIZE */
208 #define HIJACK_FDOFF 128
209 #define HIJACK_SELECT 128 /* XXX */
210 #define HIJACK_ASSERT 128 /* XXX */
211 static int
212 fd_rump2host(int fd)
213 {
214
215 if (fd == -1)
216 return fd;
217
218 if (!ISDUP2D(fd))
219 fd += HIJACK_FDOFF;
220
221 return fd;
222 }
223
224 static int
225 fd_host2rump(int fd)
226 {
227
228 if (!ISDUP2D(fd))
229 fd -= HIJACK_FDOFF;
230 return fd;
231 }
232
233 static bool
234 fd_isrump(int fd)
235 {
236
237 return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
238 }
239
240 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
241 #undef HIJACK_FDOFF
242
243 /*
244 * Following wrappers always call the rump kernel.
245 */
246
247 int __socket30(int, int, int);
248 int
249 __socket30(int domain, int type, int protocol)
250 {
251 int (*rc_socket)(int, int, int);
252 int fd;
253 bool dohost;
254
255 dohost = hostlocalsockets && (domain == AF_LOCAL);
256
257 if (dohost)
258 rc_socket = host_socket;
259 else
260 rc_socket = rumpcalls[RUMPCALL_SOCKET];
261 fd = rc_socket(domain, type, protocol);
262
263 if (!dohost)
264 fd = fd_rump2host(fd);
265 DPRINTF(("socket <- %d\n", fd));
266
267 return fd;
268 }
269
270 int
271 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
272 {
273 int (*rc_accept)(int, struct sockaddr *, socklen_t *);
274 int fd;
275 bool isrump;
276
277 isrump = fd_isrump(s);
278
279 DPRINTF(("accept -> %d", s));
280 if (isrump) {
281 rc_accept = rumpcalls[RUMPCALL_ACCEPT];
282 s = fd_host2rump(s);
283 } else {
284 rc_accept = host_accept;
285 }
286 fd = rc_accept(s, addr, addrlen);
287 if (fd != -1 && isrump)
288 fd = fd_rump2host(fd);
289
290 DPRINTF((" <- %d\n", fd));
291
292 return fd;
293 }
294
295 int
296 bind(int s, const struct sockaddr *name, socklen_t namelen)
297 {
298 int (*rc_bind)(int, const struct sockaddr *, socklen_t);
299
300 DPRINTF(("bind -> %d\n", s));
301 if (fd_isrump(s)) {
302 rc_bind = rumpcalls[RUMPCALL_BIND];
303 s = fd_host2rump(s);
304 } else {
305 rc_bind = host_bind;
306 }
307 return rc_bind(s, name, namelen);
308 }
309
310 int
311 connect(int s, const struct sockaddr *name, socklen_t namelen)
312 {
313 int (*rc_connect)(int, const struct sockaddr *, socklen_t);
314
315 DPRINTF(("connect -> %d\n", s));
316 if (fd_isrump(s)) {
317 rc_connect = rumpcalls[RUMPCALL_CONNECT];
318 s = fd_host2rump(s);
319 } else {
320 rc_connect = host_connect;
321 }
322
323 return rc_connect(s, name, namelen);
324 }
325
326 int
327 getpeername(int s, struct sockaddr *name, socklen_t *namelen)
328 {
329 int (*rc_getpeername)(int, struct sockaddr *, socklen_t *);
330
331 DPRINTF(("getpeername -> %d\n", s));
332 if (fd_isrump(s)) {
333 rc_getpeername = rumpcalls[RUMPCALL_GETPEERNAME];
334 s = fd_host2rump(s);
335 } else {
336 rc_getpeername = host_getpeername;
337 }
338 return rc_getpeername(s, name, namelen);
339 }
340
341 int
342 getsockname(int s, struct sockaddr *name, socklen_t *namelen)
343 {
344 int (*rc_getsockname)(int, struct sockaddr *, socklen_t *);
345
346 DPRINTF(("getsockname -> %d\n", s));
347 if (fd_isrump(s)) {
348 rc_getsockname = rumpcalls[RUMPCALL_GETSOCKNAME];
349 s = fd_host2rump(s);
350 } else {
351 rc_getsockname = host_getsockname;
352 }
353 return rc_getsockname(s, name, namelen);
354 }
355
356 int
357 listen(int s, int backlog)
358 {
359 int (*rc_listen)(int, int);
360
361 DPRINTF(("listen -> %d\n", s));
362 if (fd_isrump(s)) {
363 rc_listen = rumpcalls[RUMPCALL_LISTEN];
364 s = fd_host2rump(s);
365 } else {
366 rc_listen = host_listen;
367 }
368 return rc_listen(s, backlog);
369 }
370
371 ssize_t
372 recv(int s, void *buf, size_t len, int flags)
373 {
374
375 return recvfrom(s, buf, len, flags, NULL, NULL);
376 }
377
378 ssize_t
379 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
380 socklen_t *fromlen)
381 {
382 int (*rc_recvfrom)(int, void *, size_t, int,
383 struct sockaddr *, socklen_t *);
384
385 DPRINTF(("recvfrom\n"));
386 assertfd(s);
387 rc_recvfrom = rumpcalls[RUMPCALL_RECVFROM];
388 return rc_recvfrom(fd_host2rump(s), buf, len, flags, from, fromlen);
389 }
390
391 ssize_t
392 recvmsg(int s, struct msghdr *msg, int flags)
393 {
394 int (*rc_recvmsg)(int, struct msghdr *, int);
395
396 DPRINTF(("recvmsg\n"));
397 assertfd(s);
398 rc_recvmsg = rumpcalls[RUMPCALL_RECVMSG];
399 return rc_recvmsg(fd_host2rump(s), msg, flags);
400 }
401
402 ssize_t
403 send(int s, const void *buf, size_t len, int flags)
404 {
405
406 return sendto(s, buf, len, flags, NULL, 0);
407 }
408
409 ssize_t
410 sendto(int s, const void *buf, size_t len, int flags,
411 const struct sockaddr *to, socklen_t tolen)
412 {
413 int (*rc_sendto)(int, const void *, size_t, int,
414 const struct sockaddr *, socklen_t);
415
416 if (s == -1)
417 return len;
418
419 DPRINTF(("sendto\n"));
420 assertfd(s);
421 rc_sendto = rumpcalls[RUMPCALL_SENDTO];
422 return rc_sendto(fd_host2rump(s), buf, len, flags, to, tolen);
423 }
424
425 ssize_t
426 sendmsg(int s, const struct msghdr *msg, int flags)
427 {
428 int (*rc_sendmsg)(int, const struct msghdr *, int);
429
430 DPRINTF(("sendmsg\n"));
431 assertfd(s);
432 rc_sendmsg = rumpcalls[RUMPCALL_SENDTO];
433 return rc_sendmsg(fd_host2rump(s), msg, flags);
434 }
435
436 int
437 getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
438 {
439 int (*rc_getsockopt)(int, int, int, void *, socklen_t *);
440
441 DPRINTF(("getsockopt -> %d\n", s));
442 assertfd(s);
443 rc_getsockopt = rumpcalls[RUMPCALL_GETSOCKOPT];
444 return rc_getsockopt(fd_host2rump(s), level, optname, optval, optlen);
445 }
446
447 int
448 setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen)
449 {
450 int (*rc_setsockopt)(int, int, int, const void *, socklen_t);
451
452 DPRINTF(("setsockopt -> %d\n", s));
453 if (fd_isrump(s)) {
454 rc_setsockopt = rumpcalls[RUMPCALL_SETSOCKOPT];
455 s = fd_host2rump(s);
456 } else {
457 rc_setsockopt = host_setsockopt;
458 }
459 return rc_setsockopt(s, level, optname, optval, optlen);
460 }
461
462 int
463 shutdown(int s, int how)
464 {
465 int (*rc_shutdown)(int, int);
466
467 DPRINTF(("shutdown -> %d\n", s));
468 assertfd(s);
469 rc_shutdown = rumpcalls[RUMPCALL_SHUTDOWN];
470 return rc_shutdown(fd_host2rump(s), how);
471 }
472
473 /*
474 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
475 * many programs do that. dup2 of a rump kernel fd to another value
476 * not >= fdoff is an error.
477 *
478 * Note: cannot rump2host newd, because it is often hardcoded.
479 *
480 * XXX: should disable debug prints after stdout/stderr are dup2'd
481 */
482 int
483 dup2(int oldd, int newd)
484 {
485 int rv;
486
487 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
488
489 if (fd_isrump(oldd)) {
490 if (!(newd >= 0 && newd <= 2))
491 return EBADF;
492 oldd = fd_host2rump(oldd);
493 rv = rump_sys_dup2(oldd, newd);
494 if (rv != -1)
495 dup2mask |= newd+1;
496 return rv;
497 } else {
498 return host_dup2(oldd, newd);
499 }
500 }
501
502 /*
503 * We just wrap fork the appropriate rump client calls to preserve
504 * the file descriptors of the forked parent in the child, but
505 * prevent double use of connection fd.
506 */
507
508 pid_t
509 fork()
510 {
511 struct rumpclient_fork *rf;
512 pid_t rv;
513
514 DPRINTF(("fork\n"));
515
516 if ((rf = rumpclient_prefork()) == NULL)
517 return -1;
518
519 switch ((rv = host_fork())) {
520 case -1:
521 /* XXX: cancel rf */
522 break;
523 case 0:
524 if (rumpclient_fork_init(rf) == -1)
525 rv = -1;
526 break;
527 default:
528 break;
529 }
530
531 DPRINTF(("fork returns %d\n", rv));
532 return rv;
533 }
534
535 /*
536 * Hybrids
537 */
538
539 ssize_t
540 read(int fd, void *buf, size_t len)
541 {
542 int (*op_read)(int, void *, size_t);
543 ssize_t n;
544
545 DPRINTF(("read %d\n", fd));
546 if (fd_isrump(fd)) {
547 fd = fd_host2rump(fd);
548 op_read = rumpcalls[RUMPCALL_READ];
549 } else {
550 op_read = host_read;
551 }
552
553 n = op_read(fd, buf, len);
554 return n;
555 }
556
557 ssize_t
558 readv(int fd, const struct iovec *iov, int iovcnt)
559 {
560 int (*op_readv)(int, const struct iovec *, int);
561
562 DPRINTF(("readv %d\n", fd));
563 if (fd_isrump(fd)) {
564 fd = fd_host2rump(fd);
565 op_readv = rumpcalls[RUMPCALL_READV];
566 } else {
567 op_readv = host_readv;
568 }
569
570 return op_readv(fd, iov, iovcnt);
571 }
572
573 ssize_t
574 write(int fd, const void *buf, size_t len)
575 {
576 int (*op_write)(int, const void *, size_t);
577
578 if (fd_isrump(fd)) {
579 fd = fd_host2rump(fd);
580 op_write = rumpcalls[RUMPCALL_WRITE];
581 } else {
582 op_write = host_write;
583 }
584
585 return op_write(fd, buf, len);
586 }
587
588 ssize_t
589 writev(int fd, const struct iovec *iov, int iovcnt)
590 {
591 int (*op_writev)(int, const struct iovec *, int);
592
593 DPRINTF(("writev %d\n", fd));
594 if (fd_isrump(fd)) {
595 fd = fd_host2rump(fd);
596 op_writev = rumpcalls[RUMPCALL_WRITEV];
597 } else {
598 op_writev = host_writev;
599 }
600
601 return op_writev(fd, iov, iovcnt);
602 }
603
604 int
605 ioctl(int fd, unsigned long cmd, ...)
606 {
607 int (*op_ioctl)(int, unsigned long cmd, ...);
608 va_list ap;
609 int rv;
610
611 DPRINTF(("ioctl\n"));
612 if (fd_isrump(fd)) {
613 fd = fd_host2rump(fd);
614 op_ioctl = rumpcalls[RUMPCALL_IOCTL];
615 } else {
616 op_ioctl = host_ioctl;
617 }
618
619 va_start(ap, cmd);
620 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
621 va_end(ap);
622 return rv;
623 }
624
625 int
626 fcntl(int fd, int cmd, ...)
627 {
628 int (*op_fcntl)(int, int, ...);
629 va_list ap;
630 int rv;
631
632 DPRINTF(("fcntl\n"));
633 if (fd_isrump(fd)) {
634 fd = fd_host2rump(fd);
635 op_fcntl = rumpcalls[RUMPCALL_FCNTL];
636 } else {
637 op_fcntl = host_fcntl;
638 }
639
640 va_start(ap, cmd);
641 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
642 va_end(ap);
643 return rv;
644 }
645
646 int
647 close(int fd)
648 {
649 int (*op_close)(int);
650
651 DPRINTF(("close %d\n", fd));
652 if (fd_isrump(fd)) {
653 fd = fd_host2rump(fd);
654 op_close = rumpcalls[RUMPCALL_CLOSE];
655 } else {
656 op_close = host_close;
657 }
658
659 return op_close(fd);
660 }
661
662 int
663 select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
664 struct timeval *timeout)
665 {
666 struct pollfd *pfds;
667 struct timespec ts, *tsp = NULL;
668 nfds_t i, j, realnfds;
669 int rv, incr;
670
671 DPRINTF(("select\n"));
672
673 /*
674 * Well, first we must scan the fds to figure out how many
675 * fds there really are. This is because up to and including
676 * nb5 poll() silently refuses nfds > process_open_fds.
677 * Seems to be fixed in current, thank the maker.
678 * god damn cluster...bomb.
679 */
680
681 for (i = 0, realnfds = 0; i < nfds; i++) {
682 if (readfds && FD_ISSET(i, readfds)) {
683 realnfds++;
684 continue;
685 }
686 if (writefds && FD_ISSET(i, writefds)) {
687 realnfds++;
688 continue;
689 }
690 if (exceptfds && FD_ISSET(i, exceptfds)) {
691 realnfds++;
692 continue;
693 }
694 }
695
696 if (realnfds) {
697 pfds = malloc(sizeof(*pfds) * realnfds);
698 if (!pfds)
699 return -1;
700 } else {
701 pfds = NULL;
702 }
703
704 for (i = 0, j = 0; i < nfds; i++) {
705 incr = 0;
706 pfds[j].events = pfds[j].revents = 0;
707 if (readfds && FD_ISSET(i, readfds)) {
708 pfds[j].fd = i;
709 pfds[j].events |= POLLIN;
710 incr=1;
711 }
712 if (writefds && FD_ISSET(i, writefds)) {
713 pfds[j].fd = i;
714 pfds[j].events |= POLLOUT;
715 incr=1;
716 }
717 if (exceptfds && FD_ISSET(i, exceptfds)) {
718 pfds[j].fd = i;
719 pfds[j].events |= POLLHUP|POLLERR;
720 incr=1;
721 }
722 if (incr)
723 j++;
724 }
725
726 if (timeout) {
727 TIMEVAL_TO_TIMESPEC(timeout, &ts);
728 tsp = &ts;
729 }
730 rv = pollts(pfds, realnfds, tsp, NULL);
731 if (rv <= 0)
732 goto out;
733
734 /*
735 * ok, harvest results. first zero out entries (can't use
736 * FD_ZERO for the obvious select-me-not reason). whee.
737 */
738 for (i = 0; i < nfds; i++) {
739 if (readfds)
740 FD_CLR(i, readfds);
741 if (writefds)
742 FD_CLR(i, writefds);
743 if (exceptfds)
744 FD_CLR(i, exceptfds);
745 }
746
747 /* and then plug in the results */
748 for (i = 0; i < realnfds; i++) {
749 if (readfds) {
750 if (pfds[i].revents & POLLIN) {
751 FD_SET(pfds[i].fd, readfds);
752 }
753 }
754 if (writefds) {
755 if (pfds[i].revents & POLLOUT) {
756 FD_SET(pfds[i].fd, writefds);
757 }
758 }
759 if (exceptfds) {
760 if (pfds[i].revents & (POLLHUP|POLLERR)) {
761 FD_SET(pfds[i].fd, exceptfds);
762 }
763 }
764 }
765
766 out:
767 free(pfds);
768 return rv;
769 }
770
771 static void
772 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
773 {
774 nfds_t i;
775
776 for (i = 0; i < nfds; i++) {
777 if (fd_isrump(fds[i].fd))
778 (*rumpcall)++;
779 else
780 (*hostcall)++;
781 }
782 }
783
784 static void
785 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
786 {
787 nfds_t i;
788
789 for (i = 0; i < nfds; i++) {
790 fds[i].fd = fdadj(fds[i].fd);
791 }
792 }
793
794 /*
795 * poll is easy as long as the call comes in the fds only in one
796 * kernel. otherwise its quite tricky...
797 */
798 struct pollarg {
799 struct pollfd *pfds;
800 nfds_t nfds;
801 const struct timespec *ts;
802 const sigset_t *sigmask;
803 int pipefd;
804 int errnum;
805 };
806
807 static void *
808 hostpoll(void *arg)
809 {
810 struct pollarg *parg = arg;
811 intptr_t rv;
812
813 rv = host_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
814 if (rv == -1)
815 parg->errnum = errno;
816 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
817
818 return (void *)(intptr_t)rv;
819 }
820
821 int
822 pollts(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
823 const sigset_t *sigmask)
824 {
825 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
826 const sigset_t *);
827 int hostcall = 0, rumpcall = 0;
828 pthread_t pt;
829 nfds_t i;
830 int rv;
831
832 DPRINTF(("poll\n"));
833 checkpoll(fds, nfds, &hostcall, &rumpcall);
834
835 if (hostcall && rumpcall) {
836 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
837 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
838 struct pollarg parg;
839 uintptr_t lrv;
840 int sverrno = 0, trv;
841
842 /*
843 * ok, this is where it gets tricky. We must support
844 * this since it's a very common operation in certain
845 * types of software (telnet, netcat, etc). We allocate
846 * two vectors and run two poll commands in separate
847 * threads. Whichever returns first "wins" and the
848 * other kernel's fds won't show activity.
849 */
850 rv = -1;
851
852 /* allocate full vector for O(n) joining after call */
853 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
854 if (!pfd_host)
855 goto out;
856 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
857 if (!pfd_rump) {
858 goto out;
859 }
860
861 /* split vectors */
862 for (i = 0; i < nfds; i++) {
863 if (fds[i].fd == -1) {
864 pfd_host[i].fd = -1;
865 pfd_rump[i].fd = -1;
866 } else if (fd_isrump(fds[i].fd)) {
867 pfd_host[i].fd = -1;
868 pfd_rump[i].fd = fd_host2rump(fds[i].fd);
869 pfd_rump[i].events = fds[i].events;
870 } else {
871 pfd_rump[i].fd = -1;
872 pfd_host[i].fd = fds[i].fd;
873 pfd_host[i].events = fds[i].events;
874 }
875 }
876
877 /*
878 * then, open two pipes, one for notifications
879 * to each kernel.
880 */
881 if (rump_sys_pipe(rpipe) == -1)
882 goto out;
883 if (pipe(hpipe) == -1)
884 goto out;
885
886 pfd_host[nfds].fd = hpipe[0];
887 pfd_host[nfds].events = POLLIN;
888 pfd_rump[nfds].fd = rpipe[0];
889 pfd_rump[nfds].events = POLLIN;
890
891 /*
892 * then, create a thread to do host part and meanwhile
893 * do rump kernel part right here
894 */
895
896 parg.pfds = pfd_host;
897 parg.nfds = nfds+1;
898 parg.ts = ts;
899 parg.sigmask = sigmask;
900 parg.pipefd = rpipe[1];
901 pthread_create(&pt, NULL, hostpoll, &parg);
902
903 op_pollts = rumpcalls[RUMPCALL_POLLTS];
904 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
905 sverrno = errno;
906 write(hpipe[1], &rv, sizeof(rv));
907 pthread_join(pt, (void *)&trv);
908
909 /* check who "won" and merge results */
910 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
911 rv = trv;
912
913 for (i = 0; i < nfds; i++) {
914 if (pfd_rump[i].fd != -1)
915 fds[i].revents = pfd_rump[i].revents;
916 }
917 sverrno = parg.errnum;
918 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
919 rv = trv;
920
921 for (i = 0; i < nfds; i++) {
922 if (pfd_host[i].fd != -1)
923 fds[i].revents = pfd_host[i].revents;
924 }
925 } else {
926 rv = 0;
927 }
928
929 out:
930 if (rpipe[0] != -1)
931 rump_sys_close(rpipe[0]);
932 if (rpipe[1] != -1)
933 rump_sys_close(rpipe[1]);
934 if (hpipe[0] != -1)
935 close(hpipe[0]);
936 if (hpipe[1] != -1)
937 close(hpipe[1]);
938 free(pfd_host);
939 free(pfd_rump);
940 errno = sverrno;
941 } else {
942 if (hostcall) {
943 op_pollts = host_pollts;
944 } else {
945 op_pollts = rumpcalls[RUMPCALL_POLLTS];
946 adjustpoll(fds, nfds, fd_host2rump);
947 }
948
949 rv = op_pollts(fds, nfds, ts, sigmask);
950 if (rumpcall)
951 adjustpoll(fds, nfds, fd_rump2host);
952 }
953
954 return rv;
955 }
956
957 int
958 poll(struct pollfd *fds, nfds_t nfds, int timeout)
959 {
960 struct timespec ts;
961 struct timespec *tsp = NULL;
962
963 if (timeout != INFTIM) {
964 ts.tv_sec = timeout / 1000;
965 ts.tv_nsec = (timeout % 1000) * 1000;
966
967 tsp = &ts;
968 }
969
970 return pollts(fds, nfds, tsp, NULL);
971 }
972