hijack.c revision 1.10 1 /* $NetBSD: hijack.c,v 1.10 2011/01/18 11:04:10 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __RCSID("$NetBSD: hijack.c,v 1.10 2011/01/18 11:04:10 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/event.h>
34 #include <sys/ioctl.h>
35 #include <sys/socket.h>
36 #include <sys/poll.h>
37
38 #include <rump/rumpclient.h>
39 #include <rump/rump_syscalls.h>
40
41 #include <assert.h>
42 #include <dlfcn.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <fcntl.h>
46 #include <poll.h>
47 #include <pthread.h>
48 #include <signal.h>
49 #include <stdarg.h>
50 #include <stdbool.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <time.h>
54 #include <unistd.h>
55
56 enum { RUMPCALL_SOCKET, RUMPCALL_ACCEPT, RUMPCALL_BIND, RUMPCALL_CONNECT,
57 RUMPCALL_GETPEERNAME, RUMPCALL_GETSOCKNAME, RUMPCALL_LISTEN,
58 RUMPCALL_RECVFROM, RUMPCALL_RECVMSG,
59 RUMPCALL_SENDTO, RUMPCALL_SENDMSG,
60 RUMPCALL_GETSOCKOPT, RUMPCALL_SETSOCKOPT,
61 RUMPCALL_SHUTDOWN,
62 RUMPCALL_READ, RUMPCALL_READV,
63 RUMPCALL_WRITE, RUMPCALL_WRITEV,
64 RUMPCALL_IOCTL, RUMPCALL_FCNTL,
65 RUMPCALL_CLOSE,
66 RUMPCALL_POLLTS,
67 RUMPCALL__NUM
68 };
69
70 #define RSYS_STRING(a) __STRING(a)
71 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
72
73 const char *sysnames[] = {
74 RSYS_NAME(SOCKET),
75 RSYS_NAME(ACCEPT),
76 RSYS_NAME(BIND),
77 RSYS_NAME(CONNECT),
78 RSYS_NAME(GETPEERNAME),
79 RSYS_NAME(GETSOCKNAME),
80 RSYS_NAME(LISTEN),
81 RSYS_NAME(RECVFROM),
82 RSYS_NAME(RECVMSG),
83 RSYS_NAME(SENDTO),
84 RSYS_NAME(SENDMSG),
85 RSYS_NAME(GETSOCKOPT),
86 RSYS_NAME(SETSOCKOPT),
87 RSYS_NAME(SHUTDOWN),
88 RSYS_NAME(READ),
89 RSYS_NAME(READV),
90 RSYS_NAME(WRITE),
91 RSYS_NAME(WRITEV),
92 RSYS_NAME(IOCTL),
93 RSYS_NAME(FCNTL),
94 RSYS_NAME(CLOSE),
95 RSYS_NAME(POLLTS),
96 };
97
98 static int (*host_socket)(int, int, int);
99 static int (*host_connect)(int, const struct sockaddr *, socklen_t);
100 static int (*host_bind)(int, const struct sockaddr *, socklen_t);
101 static int (*host_listen)(int, int);
102 static int (*host_accept)(int, struct sockaddr *, socklen_t *);
103 static int (*host_getpeername)(int, struct sockaddr *, socklen_t *);
104 static int (*host_getsockname)(int, struct sockaddr *, socklen_t *);
105 static int (*host_setsockopt)(int, int, int, const void *, socklen_t);
106
107 static ssize_t (*host_read)(int, void *, size_t);
108 static ssize_t (*host_readv)(int, const struct iovec *, int);
109 static ssize_t (*host_write)(int, const void *, size_t);
110 static ssize_t (*host_writev)(int, const struct iovec *, int);
111 static int (*host_ioctl)(int, unsigned long, ...);
112 static int (*host_fcntl)(int, int, ...);
113 static int (*host_close)(int);
114 static int (*host_pollts)(struct pollfd *, nfds_t,
115 const struct timespec *, const sigset_t *);
116 static pid_t (*host_fork)(void);
117 static int (*host_dup2)(int, int);
118
119 static void *rumpcalls[RUMPCALL__NUM];
120
121 /*
122 * This is called from librumpclient in case of LD_PRELOAD.
123 * It ensures correct RTLD_NEXT.
124 */
125 static void *
126 hijackdlsym(void *handle, const char *symbol)
127 {
128
129 return dlsym(handle, symbol);
130 }
131
132 /* low calorie sockets? */
133 static bool hostlocalsockets = false;
134
135 static void __attribute__((constructor))
136 rcinit(void)
137 {
138 int (*rumpcinit)(void);
139 void **rumpcdlsym;
140 void *hand;
141 int i;
142
143 hand = dlopen("librumpclient.so", RTLD_LAZY|RTLD_GLOBAL);
144 if (!hand)
145 err(1, "cannot open librumpclient.so");
146 rumpcinit = dlsym(hand, "rumpclient_init");
147 _DIAGASSERT(rumpcinit);
148
149 rumpcdlsym = dlsym(hand, "rumpclient_dlsym");
150 *rumpcdlsym = hijackdlsym;
151
152 host_socket = dlsym(RTLD_NEXT, "__socket30");
153 host_listen = dlsym(RTLD_NEXT, "listen");
154 host_connect = dlsym(RTLD_NEXT, "connect");
155 host_bind = dlsym(RTLD_NEXT, "bind");
156 host_accept = dlsym(RTLD_NEXT, "accept");
157 host_getpeername = dlsym(RTLD_NEXT, "getpeername");
158 host_getsockname = dlsym(RTLD_NEXT, "getsockname");
159 host_setsockopt = dlsym(RTLD_NEXT, "setsockopt");
160
161 host_read = dlsym(RTLD_NEXT, "read");
162 host_readv = dlsym(RTLD_NEXT, "readv");
163 host_write = dlsym(RTLD_NEXT, "write");
164 host_writev = dlsym(RTLD_NEXT, "writev");
165 host_ioctl = dlsym(RTLD_NEXT, "ioctl");
166 host_fcntl = dlsym(RTLD_NEXT, "fcntl");
167 host_close = dlsym(RTLD_NEXT, "close");
168 host_pollts = dlsym(RTLD_NEXT, "pollts");
169 host_fork = dlsym(RTLD_NEXT, "fork");
170 host_dup2 = dlsym(RTLD_NEXT, "dup2");
171
172 for (i = 0; i < RUMPCALL__NUM; i++) {
173 rumpcalls[i] = dlsym(hand, sysnames[i]);
174 if (!rumpcalls[i]) {
175 fprintf(stderr, "rumphijack: cannot find symbol: %s\n",
176 sysnames[i]);
177 exit(1);
178 }
179 }
180
181 if (rumpcinit() == -1)
182 err(1, "rumpclient init");
183 }
184
185 static unsigned dup2mask;
186 #define ISDUP2D(fd) (1<<(fd) & dup2mask)
187
188 //#define DEBUGJACK
189 #ifdef DEBUGJACK
190 #define DPRINTF(x) mydprintf x
191 static void
192 mydprintf(const char *fmt, ...)
193 {
194 va_list ap;
195
196 if (ISDUP2D(STDERR_FILENO))
197 return;
198
199 va_start(ap, fmt);
200 vfprintf(stderr, fmt, ap);
201 va_end(ap);
202 }
203
204 #else
205 #define DPRINTF(x)
206 #endif
207
208 /* XXX: need runtime selection. low for now due to FD_SETSIZE */
209 #define HIJACK_FDOFF 128
210 #define HIJACK_SELECT 128 /* XXX */
211 #define HIJACK_ASSERT 128 /* XXX */
212 static int
213 fd_rump2host(int fd)
214 {
215
216 if (fd == -1)
217 return fd;
218
219 if (!ISDUP2D(fd))
220 fd += HIJACK_FDOFF;
221
222 return fd;
223 }
224
225 static int
226 fd_host2rump(int fd)
227 {
228
229 if (!ISDUP2D(fd))
230 fd -= HIJACK_FDOFF;
231 return fd;
232 }
233
234 static bool
235 fd_isrump(int fd)
236 {
237
238 return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
239 }
240
241 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
242 #undef HIJACK_FDOFF
243
244 int __socket30(int, int, int);
245 int
246 __socket30(int domain, int type, int protocol)
247 {
248 int (*rc_socket)(int, int, int);
249 int fd;
250 bool dohost;
251
252 dohost = hostlocalsockets && (domain == AF_LOCAL);
253
254 if (dohost)
255 rc_socket = host_socket;
256 else
257 rc_socket = rumpcalls[RUMPCALL_SOCKET];
258 fd = rc_socket(domain, type, protocol);
259
260 if (!dohost)
261 fd = fd_rump2host(fd);
262 DPRINTF(("socket <- %d\n", fd));
263
264 return fd;
265 }
266
267 int
268 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
269 {
270 int (*rc_accept)(int, struct sockaddr *, socklen_t *);
271 int fd;
272 bool isrump;
273
274 isrump = fd_isrump(s);
275
276 DPRINTF(("accept -> %d", s));
277 if (isrump) {
278 rc_accept = rumpcalls[RUMPCALL_ACCEPT];
279 s = fd_host2rump(s);
280 } else {
281 rc_accept = host_accept;
282 }
283 fd = rc_accept(s, addr, addrlen);
284 if (fd != -1 && isrump)
285 fd = fd_rump2host(fd);
286
287 DPRINTF((" <- %d\n", fd));
288
289 return fd;
290 }
291
292 int
293 bind(int s, const struct sockaddr *name, socklen_t namelen)
294 {
295 int (*rc_bind)(int, const struct sockaddr *, socklen_t);
296
297 DPRINTF(("bind -> %d\n", s));
298 if (fd_isrump(s)) {
299 rc_bind = rumpcalls[RUMPCALL_BIND];
300 s = fd_host2rump(s);
301 } else {
302 rc_bind = host_bind;
303 }
304 return rc_bind(s, name, namelen);
305 }
306
307 int
308 connect(int s, const struct sockaddr *name, socklen_t namelen)
309 {
310 int (*rc_connect)(int, const struct sockaddr *, socklen_t);
311
312 DPRINTF(("connect -> %d\n", s));
313 if (fd_isrump(s)) {
314 rc_connect = rumpcalls[RUMPCALL_CONNECT];
315 s = fd_host2rump(s);
316 } else {
317 rc_connect = host_connect;
318 }
319
320 return rc_connect(s, name, namelen);
321 }
322
323 int
324 getpeername(int s, struct sockaddr *name, socklen_t *namelen)
325 {
326 int (*rc_getpeername)(int, struct sockaddr *, socklen_t *);
327
328 DPRINTF(("getpeername -> %d\n", s));
329 if (fd_isrump(s)) {
330 rc_getpeername = rumpcalls[RUMPCALL_GETPEERNAME];
331 s = fd_host2rump(s);
332 } else {
333 rc_getpeername = host_getpeername;
334 }
335 return rc_getpeername(s, name, namelen);
336 }
337
338 int
339 getsockname(int s, struct sockaddr *name, socklen_t *namelen)
340 {
341 int (*rc_getsockname)(int, struct sockaddr *, socklen_t *);
342
343 DPRINTF(("getsockname -> %d\n", s));
344 if (fd_isrump(s)) {
345 rc_getsockname = rumpcalls[RUMPCALL_GETSOCKNAME];
346 s = fd_host2rump(s);
347 } else {
348 rc_getsockname = host_getsockname;
349 }
350 return rc_getsockname(s, name, namelen);
351 }
352
353 int
354 listen(int s, int backlog)
355 {
356 int (*rc_listen)(int, int);
357
358 DPRINTF(("listen -> %d\n", s));
359 if (fd_isrump(s)) {
360 rc_listen = rumpcalls[RUMPCALL_LISTEN];
361 s = fd_host2rump(s);
362 } else {
363 rc_listen = host_listen;
364 }
365 return rc_listen(s, backlog);
366 }
367
368 ssize_t
369 recv(int s, void *buf, size_t len, int flags)
370 {
371
372 return recvfrom(s, buf, len, flags, NULL, NULL);
373 }
374
375 ssize_t
376 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
377 socklen_t *fromlen)
378 {
379 int (*rc_recvfrom)(int, void *, size_t, int,
380 struct sockaddr *, socklen_t *);
381
382 DPRINTF(("recvfrom\n"));
383 assertfd(s);
384 rc_recvfrom = rumpcalls[RUMPCALL_RECVFROM];
385 return rc_recvfrom(fd_host2rump(s), buf, len, flags, from, fromlen);
386 }
387
388 ssize_t
389 recvmsg(int s, struct msghdr *msg, int flags)
390 {
391 int (*rc_recvmsg)(int, struct msghdr *, int);
392
393 DPRINTF(("recvmsg\n"));
394 assertfd(s);
395 rc_recvmsg = rumpcalls[RUMPCALL_RECVMSG];
396 return rc_recvmsg(fd_host2rump(s), msg, flags);
397 }
398
399 ssize_t
400 send(int s, const void *buf, size_t len, int flags)
401 {
402
403 return sendto(s, buf, len, flags, NULL, 0);
404 }
405
406 ssize_t
407 sendto(int s, const void *buf, size_t len, int flags,
408 const struct sockaddr *to, socklen_t tolen)
409 {
410 int (*rc_sendto)(int, const void *, size_t, int,
411 const struct sockaddr *, socklen_t);
412
413 if (s == -1)
414 return len;
415
416 DPRINTF(("sendto\n"));
417 assertfd(s);
418 rc_sendto = rumpcalls[RUMPCALL_SENDTO];
419 return rc_sendto(fd_host2rump(s), buf, len, flags, to, tolen);
420 }
421
422 ssize_t
423 sendmsg(int s, const struct msghdr *msg, int flags)
424 {
425 int (*rc_sendmsg)(int, const struct msghdr *, int);
426
427 DPRINTF(("sendmsg\n"));
428 assertfd(s);
429 rc_sendmsg = rumpcalls[RUMPCALL_SENDTO];
430 return rc_sendmsg(fd_host2rump(s), msg, flags);
431 }
432
433 int
434 getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
435 {
436 int (*rc_getsockopt)(int, int, int, void *, socklen_t *);
437
438 DPRINTF(("getsockopt -> %d\n", s));
439 assertfd(s);
440 rc_getsockopt = rumpcalls[RUMPCALL_GETSOCKOPT];
441 return rc_getsockopt(fd_host2rump(s), level, optname, optval, optlen);
442 }
443
444 int
445 setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen)
446 {
447 int (*rc_setsockopt)(int, int, int, const void *, socklen_t);
448
449 DPRINTF(("setsockopt -> %d\n", s));
450 if (fd_isrump(s)) {
451 rc_setsockopt = rumpcalls[RUMPCALL_SETSOCKOPT];
452 s = fd_host2rump(s);
453 } else {
454 rc_setsockopt = host_setsockopt;
455 }
456 return rc_setsockopt(s, level, optname, optval, optlen);
457 }
458
459 int
460 shutdown(int s, int how)
461 {
462 int (*rc_shutdown)(int, int);
463
464 DPRINTF(("shutdown -> %d\n", s));
465 assertfd(s);
466 rc_shutdown = rumpcalls[RUMPCALL_SHUTDOWN];
467 return rc_shutdown(fd_host2rump(s), how);
468 }
469
470 /*
471 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
472 * many programs do that. dup2 of a rump kernel fd to another value
473 * not >= fdoff is an error.
474 *
475 * Note: cannot rump2host newd, because it is often hardcoded.
476 *
477 * XXX: should disable debug prints after stdout/stderr are dup2'd
478 */
479 int
480 dup2(int oldd, int newd)
481 {
482 int rv;
483
484 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
485
486 if (fd_isrump(oldd)) {
487 if (!(newd >= 0 && newd <= 2))
488 return EBADF;
489 oldd = fd_host2rump(oldd);
490 rv = rump_sys_dup2(oldd, newd);
491 if (rv != -1)
492 dup2mask |= 1<<newd;
493 } else {
494 rv = host_dup2(oldd, newd);
495 }
496
497 return rv;
498 }
499
500 /*
501 * We just wrap fork the appropriate rump client calls to preserve
502 * the file descriptors of the forked parent in the child, but
503 * prevent double use of connection fd.
504 */
505
506 pid_t
507 fork()
508 {
509 struct rumpclient_fork *rf;
510 pid_t rv;
511
512 DPRINTF(("fork\n"));
513
514 if ((rf = rumpclient_prefork()) == NULL)
515 return -1;
516
517 switch ((rv = host_fork())) {
518 case -1:
519 /* XXX: cancel rf */
520 break;
521 case 0:
522 if (rumpclient_fork_init(rf) == -1)
523 rv = -1;
524 break;
525 default:
526 break;
527 }
528
529 DPRINTF(("fork returns %d\n", rv));
530 return rv;
531 }
532
533 /*
534 * Hybrids
535 */
536
537 ssize_t
538 read(int fd, void *buf, size_t len)
539 {
540 int (*op_read)(int, void *, size_t);
541 ssize_t n;
542
543 DPRINTF(("read %d\n", fd));
544 if (fd_isrump(fd)) {
545 fd = fd_host2rump(fd);
546 op_read = rumpcalls[RUMPCALL_READ];
547 } else {
548 op_read = host_read;
549 }
550
551 n = op_read(fd, buf, len);
552 return n;
553 }
554
555 ssize_t
556 readv(int fd, const struct iovec *iov, int iovcnt)
557 {
558 int (*op_readv)(int, const struct iovec *, int);
559
560 DPRINTF(("readv %d\n", fd));
561 if (fd_isrump(fd)) {
562 fd = fd_host2rump(fd);
563 op_readv = rumpcalls[RUMPCALL_READV];
564 } else {
565 op_readv = host_readv;
566 }
567
568 return op_readv(fd, iov, iovcnt);
569 }
570
571 ssize_t
572 write(int fd, const void *buf, size_t len)
573 {
574 int (*op_write)(int, const void *, size_t);
575
576 if (fd_isrump(fd)) {
577 fd = fd_host2rump(fd);
578 op_write = rumpcalls[RUMPCALL_WRITE];
579 } else {
580 op_write = host_write;
581 }
582
583 return op_write(fd, buf, len);
584 }
585
586 ssize_t
587 writev(int fd, const struct iovec *iov, int iovcnt)
588 {
589 int (*op_writev)(int, const struct iovec *, int);
590
591 DPRINTF(("writev %d\n", fd));
592 if (fd_isrump(fd)) {
593 fd = fd_host2rump(fd);
594 op_writev = rumpcalls[RUMPCALL_WRITEV];
595 } else {
596 op_writev = host_writev;
597 }
598
599 return op_writev(fd, iov, iovcnt);
600 }
601
602 int
603 ioctl(int fd, unsigned long cmd, ...)
604 {
605 int (*op_ioctl)(int, unsigned long cmd, ...);
606 va_list ap;
607 int rv;
608
609 DPRINTF(("ioctl\n"));
610 if (fd_isrump(fd)) {
611 fd = fd_host2rump(fd);
612 op_ioctl = rumpcalls[RUMPCALL_IOCTL];
613 } else {
614 op_ioctl = host_ioctl;
615 }
616
617 va_start(ap, cmd);
618 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
619 va_end(ap);
620 return rv;
621 }
622
623 int
624 fcntl(int fd, int cmd, ...)
625 {
626 int (*op_fcntl)(int, int, ...);
627 va_list ap;
628 int rv;
629
630 DPRINTF(("fcntl\n"));
631 if (fd_isrump(fd)) {
632 fd = fd_host2rump(fd);
633 op_fcntl = rumpcalls[RUMPCALL_FCNTL];
634 } else {
635 op_fcntl = host_fcntl;
636 }
637
638 va_start(ap, cmd);
639 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
640 va_end(ap);
641 return rv;
642 }
643
644 int
645 close(int fd)
646 {
647 int (*op_close)(int);
648
649 DPRINTF(("close %d\n", fd));
650 if (fd_isrump(fd)) {
651 fd = fd_host2rump(fd);
652 op_close = rumpcalls[RUMPCALL_CLOSE];
653 } else {
654 op_close = host_close;
655 }
656
657 return op_close(fd);
658 }
659
660 int
661 select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
662 struct timeval *timeout)
663 {
664 struct pollfd *pfds;
665 struct timespec ts, *tsp = NULL;
666 nfds_t i, j, realnfds;
667 int rv, incr;
668
669 DPRINTF(("select\n"));
670
671 /*
672 * Well, first we must scan the fds to figure out how many
673 * fds there really are. This is because up to and including
674 * nb5 poll() silently refuses nfds > process_open_fds.
675 * Seems to be fixed in current, thank the maker.
676 * god damn cluster...bomb.
677 */
678
679 for (i = 0, realnfds = 0; i < nfds; i++) {
680 if (readfds && FD_ISSET(i, readfds)) {
681 realnfds++;
682 continue;
683 }
684 if (writefds && FD_ISSET(i, writefds)) {
685 realnfds++;
686 continue;
687 }
688 if (exceptfds && FD_ISSET(i, exceptfds)) {
689 realnfds++;
690 continue;
691 }
692 }
693
694 if (realnfds) {
695 pfds = malloc(sizeof(*pfds) * realnfds);
696 if (!pfds)
697 return -1;
698 } else {
699 pfds = NULL;
700 }
701
702 for (i = 0, j = 0; i < nfds; i++) {
703 incr = 0;
704 pfds[j].events = pfds[j].revents = 0;
705 if (readfds && FD_ISSET(i, readfds)) {
706 pfds[j].fd = i;
707 pfds[j].events |= POLLIN;
708 incr=1;
709 }
710 if (writefds && FD_ISSET(i, writefds)) {
711 pfds[j].fd = i;
712 pfds[j].events |= POLLOUT;
713 incr=1;
714 }
715 if (exceptfds && FD_ISSET(i, exceptfds)) {
716 pfds[j].fd = i;
717 pfds[j].events |= POLLHUP|POLLERR;
718 incr=1;
719 }
720 if (incr)
721 j++;
722 }
723
724 if (timeout) {
725 TIMEVAL_TO_TIMESPEC(timeout, &ts);
726 tsp = &ts;
727 }
728 rv = pollts(pfds, realnfds, tsp, NULL);
729 if (rv <= 0)
730 goto out;
731
732 /*
733 * ok, harvest results. first zero out entries (can't use
734 * FD_ZERO for the obvious select-me-not reason). whee.
735 */
736 for (i = 0; i < nfds; i++) {
737 if (readfds)
738 FD_CLR(i, readfds);
739 if (writefds)
740 FD_CLR(i, writefds);
741 if (exceptfds)
742 FD_CLR(i, exceptfds);
743 }
744
745 /* and then plug in the results */
746 for (i = 0; i < realnfds; i++) {
747 if (readfds) {
748 if (pfds[i].revents & POLLIN) {
749 FD_SET(pfds[i].fd, readfds);
750 }
751 }
752 if (writefds) {
753 if (pfds[i].revents & POLLOUT) {
754 FD_SET(pfds[i].fd, writefds);
755 }
756 }
757 if (exceptfds) {
758 if (pfds[i].revents & (POLLHUP|POLLERR)) {
759 FD_SET(pfds[i].fd, exceptfds);
760 }
761 }
762 }
763
764 out:
765 free(pfds);
766 return rv;
767 }
768
769 static void
770 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
771 {
772 nfds_t i;
773
774 for (i = 0; i < nfds; i++) {
775 if (fd_isrump(fds[i].fd))
776 (*rumpcall)++;
777 else
778 (*hostcall)++;
779 }
780 }
781
782 static void
783 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
784 {
785 nfds_t i;
786
787 for (i = 0; i < nfds; i++) {
788 fds[i].fd = fdadj(fds[i].fd);
789 }
790 }
791
792 /*
793 * poll is easy as long as the call comes in the fds only in one
794 * kernel. otherwise its quite tricky...
795 */
796 struct pollarg {
797 struct pollfd *pfds;
798 nfds_t nfds;
799 const struct timespec *ts;
800 const sigset_t *sigmask;
801 int pipefd;
802 int errnum;
803 };
804
805 static void *
806 hostpoll(void *arg)
807 {
808 struct pollarg *parg = arg;
809 intptr_t rv;
810
811 rv = host_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
812 if (rv == -1)
813 parg->errnum = errno;
814 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
815
816 return (void *)(intptr_t)rv;
817 }
818
819 int
820 pollts(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
821 const sigset_t *sigmask)
822 {
823 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
824 const sigset_t *);
825 int hostcall = 0, rumpcall = 0;
826 pthread_t pt;
827 nfds_t i;
828 int rv;
829
830 DPRINTF(("poll\n"));
831 checkpoll(fds, nfds, &hostcall, &rumpcall);
832
833 if (hostcall && rumpcall) {
834 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
835 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
836 struct pollarg parg;
837 uintptr_t lrv;
838 int sverrno = 0, trv;
839
840 /*
841 * ok, this is where it gets tricky. We must support
842 * this since it's a very common operation in certain
843 * types of software (telnet, netcat, etc). We allocate
844 * two vectors and run two poll commands in separate
845 * threads. Whichever returns first "wins" and the
846 * other kernel's fds won't show activity.
847 */
848 rv = -1;
849
850 /* allocate full vector for O(n) joining after call */
851 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
852 if (!pfd_host)
853 goto out;
854 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
855 if (!pfd_rump) {
856 goto out;
857 }
858
859 /* split vectors */
860 for (i = 0; i < nfds; i++) {
861 if (fds[i].fd == -1) {
862 pfd_host[i].fd = -1;
863 pfd_rump[i].fd = -1;
864 } else if (fd_isrump(fds[i].fd)) {
865 pfd_host[i].fd = -1;
866 pfd_rump[i].fd = fd_host2rump(fds[i].fd);
867 pfd_rump[i].events = fds[i].events;
868 } else {
869 pfd_rump[i].fd = -1;
870 pfd_host[i].fd = fds[i].fd;
871 pfd_host[i].events = fds[i].events;
872 }
873 }
874
875 /*
876 * then, open two pipes, one for notifications
877 * to each kernel.
878 */
879 if (rump_sys_pipe(rpipe) == -1)
880 goto out;
881 if (pipe(hpipe) == -1)
882 goto out;
883
884 pfd_host[nfds].fd = hpipe[0];
885 pfd_host[nfds].events = POLLIN;
886 pfd_rump[nfds].fd = rpipe[0];
887 pfd_rump[nfds].events = POLLIN;
888
889 /*
890 * then, create a thread to do host part and meanwhile
891 * do rump kernel part right here
892 */
893
894 parg.pfds = pfd_host;
895 parg.nfds = nfds+1;
896 parg.ts = ts;
897 parg.sigmask = sigmask;
898 parg.pipefd = rpipe[1];
899 pthread_create(&pt, NULL, hostpoll, &parg);
900
901 op_pollts = rumpcalls[RUMPCALL_POLLTS];
902 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
903 sverrno = errno;
904 write(hpipe[1], &rv, sizeof(rv));
905 pthread_join(pt, (void *)&trv);
906
907 /* check who "won" and merge results */
908 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
909 rv = trv;
910
911 for (i = 0; i < nfds; i++) {
912 if (pfd_rump[i].fd != -1)
913 fds[i].revents = pfd_rump[i].revents;
914 }
915 sverrno = parg.errnum;
916 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
917 rv = trv;
918
919 for (i = 0; i < nfds; i++) {
920 if (pfd_host[i].fd != -1)
921 fds[i].revents = pfd_host[i].revents;
922 }
923 } else {
924 rv = 0;
925 }
926
927 out:
928 if (rpipe[0] != -1)
929 rump_sys_close(rpipe[0]);
930 if (rpipe[1] != -1)
931 rump_sys_close(rpipe[1]);
932 if (hpipe[0] != -1)
933 host_close(hpipe[0]);
934 if (hpipe[1] != -1)
935 host_close(hpipe[1]);
936 free(pfd_host);
937 free(pfd_rump);
938 errno = sverrno;
939 } else {
940 if (hostcall) {
941 op_pollts = host_pollts;
942 } else {
943 op_pollts = rumpcalls[RUMPCALL_POLLTS];
944 adjustpoll(fds, nfds, fd_host2rump);
945 }
946
947 rv = op_pollts(fds, nfds, ts, sigmask);
948 if (rumpcall)
949 adjustpoll(fds, nfds, fd_rump2host);
950 }
951
952 return rv;
953 }
954
955 int
956 poll(struct pollfd *fds, nfds_t nfds, int timeout)
957 {
958 struct timespec ts;
959 struct timespec *tsp = NULL;
960
961 if (timeout != INFTIM) {
962 ts.tv_sec = timeout / 1000;
963 ts.tv_nsec = (timeout % 1000) * 1000;
964
965 tsp = &ts;
966 }
967
968 return pollts(fds, nfds, tsp, NULL);
969 }
970
971 int
972 kqueue(void)
973 {
974
975 abort();
976 }
977
978 int
979 kevent(int kq, const struct kevent *changelist, size_t nchanges,
980 struct kevent *eventlist, size_t nevents,
981 const struct timespec *timeout)
982 {
983
984 abort();
985 }
986