hijack.c revision 1.15 1 /* $NetBSD: hijack.c,v 1.15 2011/01/18 23:43:21 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __RCSID("$NetBSD: hijack.c,v 1.15 2011/01/18 23:43:21 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/event.h>
34 #include <sys/ioctl.h>
35 #include <sys/socket.h>
36 #include <sys/poll.h>
37
38 #include <rump/rumpclient.h>
39 #include <rump/rump_syscalls.h>
40
41 #include <assert.h>
42 #include <dlfcn.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <fcntl.h>
46 #include <poll.h>
47 #include <pthread.h>
48 #include <signal.h>
49 #include <stdarg.h>
50 #include <stdbool.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <time.h>
54 #include <unistd.h>
55
56 enum { RUMPCALL_SOCKET, RUMPCALL_ACCEPT, RUMPCALL_BIND, RUMPCALL_CONNECT,
57 RUMPCALL_GETPEERNAME, RUMPCALL_GETSOCKNAME, RUMPCALL_LISTEN,
58 RUMPCALL_RECVFROM, RUMPCALL_RECVMSG,
59 RUMPCALL_SENDTO, RUMPCALL_SENDMSG,
60 RUMPCALL_GETSOCKOPT, RUMPCALL_SETSOCKOPT,
61 RUMPCALL_SHUTDOWN,
62 RUMPCALL_READ, RUMPCALL_READV,
63 RUMPCALL_WRITE, RUMPCALL_WRITEV,
64 RUMPCALL_IOCTL, RUMPCALL_FCNTL,
65 RUMPCALL_CLOSE,
66 RUMPCALL_POLLTS,
67 RUMPCALL__NUM
68 };
69
70 #define RSYS_STRING(a) __STRING(a)
71 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
72
73 const char *sysnames[] = {
74 RSYS_NAME(SOCKET),
75 RSYS_NAME(ACCEPT),
76 RSYS_NAME(BIND),
77 RSYS_NAME(CONNECT),
78 RSYS_NAME(GETPEERNAME),
79 RSYS_NAME(GETSOCKNAME),
80 RSYS_NAME(LISTEN),
81 RSYS_NAME(RECVFROM),
82 RSYS_NAME(RECVMSG),
83 RSYS_NAME(SENDTO),
84 RSYS_NAME(SENDMSG),
85 RSYS_NAME(GETSOCKOPT),
86 RSYS_NAME(SETSOCKOPT),
87 RSYS_NAME(SHUTDOWN),
88 RSYS_NAME(READ),
89 RSYS_NAME(READV),
90 RSYS_NAME(WRITE),
91 RSYS_NAME(WRITEV),
92 RSYS_NAME(IOCTL),
93 RSYS_NAME(FCNTL),
94 RSYS_NAME(CLOSE),
95 RSYS_NAME(POLLTS),
96 };
97
98 static int (*host_socket)(int, int, int);
99 static int (*host_connect)(int, const struct sockaddr *, socklen_t);
100 static int (*host_bind)(int, const struct sockaddr *, socklen_t);
101 static int (*host_listen)(int, int);
102 static int (*host_accept)(int, struct sockaddr *, socklen_t *);
103 static int (*host_getpeername)(int, struct sockaddr *, socklen_t *);
104 static int (*host_getsockname)(int, struct sockaddr *, socklen_t *);
105 static int (*host_setsockopt)(int, int, int, const void *, socklen_t);
106
107 static ssize_t (*host_read)(int, void *, size_t);
108 static ssize_t (*host_readv)(int, const struct iovec *, int);
109 static ssize_t (*host_write)(int, const void *, size_t);
110 static ssize_t (*host_writev)(int, const struct iovec *, int);
111 static int (*host_ioctl)(int, unsigned long, ...);
112 static int (*host_fcntl)(int, int, ...);
113 static int (*host_close)(int);
114 static int (*host_pollts)(struct pollfd *, nfds_t,
115 const struct timespec *, const sigset_t *);
116 static pid_t (*host_fork)(void);
117 static int (*host_dup2)(int, int);
118 static int (*host_shutdown)(int, int);
119
120 static void *rumpcalls[RUMPCALL__NUM];
121
122 /*
123 * Would be nice to get this automatically in sync with libc.
124 * Also, this does not work for compat-using binaries!
125 */
126
127 #if !__NetBSD_Prereq__(5,99,7)
128 #define SELECT select
129 #define POLLTS pollts
130 #define POLL poll
131 #else
132 #define SELECT __select50
133 #define POLLTS __pollts50
134 #define POLL __poll50
135
136 int SELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
137 int POLLTS(struct pollfd *, nfds_t, const struct timespec *, const sigset_t *);
138 int POLL(struct pollfd *, nfds_t, int);
139 #endif
140
141 /*
142 * This is called from librumpclient in case of LD_PRELOAD.
143 * It ensures correct RTLD_NEXT.
144 */
145 static void *
146 hijackdlsym(void *handle, const char *symbol)
147 {
148
149 return dlsym(handle, symbol);
150 }
151
152 /* low calorie sockets? */
153 static bool hostlocalsockets = true;
154
155 static void __attribute__((constructor))
156 rcinit(void)
157 {
158 int (*rumpcinit)(void);
159 void **rumpcdlsym;
160 void *hand;
161 int i;
162
163 hand = dlopen("librumpclient.so", RTLD_LAZY|RTLD_GLOBAL);
164 if (!hand)
165 err(1, "cannot open librumpclient.so");
166 rumpcinit = dlsym(hand, "rumpclient_init");
167 _DIAGASSERT(rumpcinit);
168
169 rumpcdlsym = dlsym(hand, "rumpclient_dlsym");
170 *rumpcdlsym = hijackdlsym;
171
172 host_socket = dlsym(RTLD_NEXT, "__socket30");
173 host_listen = dlsym(RTLD_NEXT, "listen");
174 host_connect = dlsym(RTLD_NEXT, "connect");
175 host_bind = dlsym(RTLD_NEXT, "bind");
176 host_accept = dlsym(RTLD_NEXT, "accept");
177 host_getpeername = dlsym(RTLD_NEXT, "getpeername");
178 host_getsockname = dlsym(RTLD_NEXT, "getsockname");
179 host_setsockopt = dlsym(RTLD_NEXT, "setsockopt");
180
181 host_read = dlsym(RTLD_NEXT, "read");
182 host_readv = dlsym(RTLD_NEXT, "readv");
183 host_write = dlsym(RTLD_NEXT, "write");
184 host_writev = dlsym(RTLD_NEXT, "writev");
185 host_ioctl = dlsym(RTLD_NEXT, "ioctl");
186 host_fcntl = dlsym(RTLD_NEXT, "fcntl");
187 host_close = dlsym(RTLD_NEXT, "close");
188 host_pollts = dlsym(RTLD_NEXT, "pollts");
189 host_fork = dlsym(RTLD_NEXT, "fork");
190 host_dup2 = dlsym(RTLD_NEXT, "dup2");
191 host_shutdown = dlsym(RTLD_NEXT, "shutdown");
192
193 for (i = 0; i < RUMPCALL__NUM; i++) {
194 rumpcalls[i] = dlsym(hand, sysnames[i]);
195 if (!rumpcalls[i]) {
196 fprintf(stderr, "rumphijack: cannot find symbol: %s\n",
197 sysnames[i]);
198 exit(1);
199 }
200 }
201
202 if (rumpcinit() == -1)
203 err(1, "rumpclient init");
204 }
205
206 static unsigned dup2mask;
207 #define ISDUP2D(fd) (1<<(fd) & dup2mask)
208
209 //#define DEBUGJACK
210 #ifdef DEBUGJACK
211 #define DPRINTF(x) mydprintf x
212 static void
213 mydprintf(const char *fmt, ...)
214 {
215 va_list ap;
216
217 if (ISDUP2D(STDERR_FILENO))
218 return;
219
220 va_start(ap, fmt);
221 vfprintf(stderr, fmt, ap);
222 va_end(ap);
223 }
224
225 #else
226 #define DPRINTF(x)
227 #endif
228
229 /* XXX: need runtime selection. low for now due to FD_SETSIZE */
230 #define HIJACK_FDOFF 128
231 #define HIJACK_SELECT 128 /* XXX */
232 #define HIJACK_ASSERT 128 /* XXX */
233 static int
234 fd_rump2host(int fd)
235 {
236
237 if (fd == -1)
238 return fd;
239
240 if (!ISDUP2D(fd))
241 fd += HIJACK_FDOFF;
242
243 return fd;
244 }
245
246 static int
247 fd_host2rump(int fd)
248 {
249
250 if (!ISDUP2D(fd))
251 fd -= HIJACK_FDOFF;
252 return fd;
253 }
254
255 static bool
256 fd_isrump(int fd)
257 {
258
259 return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
260 }
261
262 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
263 #undef HIJACK_FDOFF
264
265 int __socket30(int, int, int);
266 int
267 __socket30(int domain, int type, int protocol)
268 {
269 int (*rc_socket)(int, int, int);
270 int fd;
271 bool dohost;
272
273 dohost = hostlocalsockets && (domain == AF_LOCAL);
274
275 if (dohost)
276 rc_socket = host_socket;
277 else
278 rc_socket = rumpcalls[RUMPCALL_SOCKET];
279 fd = rc_socket(domain, type, protocol);
280
281 if (!dohost)
282 fd = fd_rump2host(fd);
283 DPRINTF(("socket <- %d\n", fd));
284
285 return fd;
286 }
287
288 int
289 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
290 {
291 int (*rc_accept)(int, struct sockaddr *, socklen_t *);
292 int fd;
293 bool isrump;
294
295 isrump = fd_isrump(s);
296
297 DPRINTF(("accept -> %d", s));
298 if (isrump) {
299 rc_accept = rumpcalls[RUMPCALL_ACCEPT];
300 s = fd_host2rump(s);
301 } else {
302 rc_accept = host_accept;
303 }
304 fd = rc_accept(s, addr, addrlen);
305 if (fd != -1 && isrump)
306 fd = fd_rump2host(fd);
307
308 DPRINTF((" <- %d\n", fd));
309
310 return fd;
311 }
312
313 int
314 bind(int s, const struct sockaddr *name, socklen_t namelen)
315 {
316 int (*rc_bind)(int, const struct sockaddr *, socklen_t);
317
318 DPRINTF(("bind -> %d\n", s));
319 if (fd_isrump(s)) {
320 rc_bind = rumpcalls[RUMPCALL_BIND];
321 s = fd_host2rump(s);
322 } else {
323 rc_bind = host_bind;
324 }
325 return rc_bind(s, name, namelen);
326 }
327
328 int
329 connect(int s, const struct sockaddr *name, socklen_t namelen)
330 {
331 int (*rc_connect)(int, const struct sockaddr *, socklen_t);
332
333 DPRINTF(("connect -> %d\n", s));
334 if (fd_isrump(s)) {
335 rc_connect = rumpcalls[RUMPCALL_CONNECT];
336 s = fd_host2rump(s);
337 } else {
338 rc_connect = host_connect;
339 }
340
341 return rc_connect(s, name, namelen);
342 }
343
344 int
345 getpeername(int s, struct sockaddr *name, socklen_t *namelen)
346 {
347 int (*rc_getpeername)(int, struct sockaddr *, socklen_t *);
348
349 DPRINTF(("getpeername -> %d\n", s));
350 if (fd_isrump(s)) {
351 rc_getpeername = rumpcalls[RUMPCALL_GETPEERNAME];
352 s = fd_host2rump(s);
353 } else {
354 rc_getpeername = host_getpeername;
355 }
356 return rc_getpeername(s, name, namelen);
357 }
358
359 int
360 getsockname(int s, struct sockaddr *name, socklen_t *namelen)
361 {
362 int (*rc_getsockname)(int, struct sockaddr *, socklen_t *);
363
364 DPRINTF(("getsockname -> %d\n", s));
365 if (fd_isrump(s)) {
366 rc_getsockname = rumpcalls[RUMPCALL_GETSOCKNAME];
367 s = fd_host2rump(s);
368 } else {
369 rc_getsockname = host_getsockname;
370 }
371 return rc_getsockname(s, name, namelen);
372 }
373
374 int
375 listen(int s, int backlog)
376 {
377 int (*rc_listen)(int, int);
378
379 DPRINTF(("listen -> %d\n", s));
380 if (fd_isrump(s)) {
381 rc_listen = rumpcalls[RUMPCALL_LISTEN];
382 s = fd_host2rump(s);
383 } else {
384 rc_listen = host_listen;
385 }
386 return rc_listen(s, backlog);
387 }
388
389 ssize_t
390 recv(int s, void *buf, size_t len, int flags)
391 {
392
393 return recvfrom(s, buf, len, flags, NULL, NULL);
394 }
395
396 ssize_t
397 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
398 socklen_t *fromlen)
399 {
400 int (*rc_recvfrom)(int, void *, size_t, int,
401 struct sockaddr *, socklen_t *);
402
403 DPRINTF(("recvfrom\n"));
404 assertfd(s);
405 rc_recvfrom = rumpcalls[RUMPCALL_RECVFROM];
406 return rc_recvfrom(fd_host2rump(s), buf, len, flags, from, fromlen);
407 }
408
409 ssize_t
410 recvmsg(int s, struct msghdr *msg, int flags)
411 {
412 int (*rc_recvmsg)(int, struct msghdr *, int);
413
414 DPRINTF(("recvmsg\n"));
415 assertfd(s);
416 rc_recvmsg = rumpcalls[RUMPCALL_RECVMSG];
417 return rc_recvmsg(fd_host2rump(s), msg, flags);
418 }
419
420 ssize_t
421 send(int s, const void *buf, size_t len, int flags)
422 {
423
424 return sendto(s, buf, len, flags, NULL, 0);
425 }
426
427 ssize_t
428 sendto(int s, const void *buf, size_t len, int flags,
429 const struct sockaddr *to, socklen_t tolen)
430 {
431 int (*rc_sendto)(int, const void *, size_t, int,
432 const struct sockaddr *, socklen_t);
433
434 if (s == -1)
435 return len;
436
437 DPRINTF(("sendto\n"));
438 assertfd(s);
439 rc_sendto = rumpcalls[RUMPCALL_SENDTO];
440 return rc_sendto(fd_host2rump(s), buf, len, flags, to, tolen);
441 }
442
443 ssize_t
444 sendmsg(int s, const struct msghdr *msg, int flags)
445 {
446 int (*rc_sendmsg)(int, const struct msghdr *, int);
447
448 DPRINTF(("sendmsg\n"));
449 assertfd(s);
450 rc_sendmsg = rumpcalls[RUMPCALL_SENDTO];
451 return rc_sendmsg(fd_host2rump(s), msg, flags);
452 }
453
454 int
455 getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
456 {
457 int (*rc_getsockopt)(int, int, int, void *, socklen_t *);
458
459 DPRINTF(("getsockopt -> %d\n", s));
460 assertfd(s);
461 rc_getsockopt = rumpcalls[RUMPCALL_GETSOCKOPT];
462 return rc_getsockopt(fd_host2rump(s), level, optname, optval, optlen);
463 }
464
465 int
466 setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen)
467 {
468 int (*rc_setsockopt)(int, int, int, const void *, socklen_t);
469
470 DPRINTF(("setsockopt -> %d\n", s));
471 if (fd_isrump(s)) {
472 rc_setsockopt = rumpcalls[RUMPCALL_SETSOCKOPT];
473 s = fd_host2rump(s);
474 } else {
475 rc_setsockopt = host_setsockopt;
476 }
477 return rc_setsockopt(s, level, optname, optval, optlen);
478 }
479
480 int
481 shutdown(int s, int how)
482 {
483 int (*rc_shutdown)(int, int);
484
485 DPRINTF(("shutdown -> %d\n", s));
486 if (fd_isrump(s)) {
487 rc_shutdown = rumpcalls[RUMPCALL_SHUTDOWN];
488 s = fd_host2rump(s);
489 } else {
490 rc_shutdown = host_shutdown;
491 }
492 return rc_shutdown(s, how);
493 }
494
495 /*
496 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
497 * many programs do that. dup2 of a rump kernel fd to another value
498 * not >= fdoff is an error.
499 *
500 * Note: cannot rump2host newd, because it is often hardcoded.
501 *
502 * XXX: should disable debug prints after stdout/stderr are dup2'd
503 */
504 int
505 dup2(int oldd, int newd)
506 {
507 int rv;
508
509 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
510
511 if (fd_isrump(oldd)) {
512 if (!(newd >= 0 && newd <= 2))
513 return EBADF;
514 oldd = fd_host2rump(oldd);
515 rv = rump_sys_dup2(oldd, newd);
516 if (rv != -1)
517 dup2mask |= 1<<newd;
518 } else {
519 rv = host_dup2(oldd, newd);
520 }
521
522 return rv;
523 }
524
525 /*
526 * We just wrap fork the appropriate rump client calls to preserve
527 * the file descriptors of the forked parent in the child, but
528 * prevent double use of connection fd.
529 */
530
531 pid_t
532 fork()
533 {
534 struct rumpclient_fork *rf;
535 pid_t rv;
536
537 DPRINTF(("fork\n"));
538
539 if ((rf = rumpclient_prefork()) == NULL)
540 return -1;
541
542 switch ((rv = host_fork())) {
543 case -1:
544 /* XXX: cancel rf */
545 break;
546 case 0:
547 if (rumpclient_fork_init(rf) == -1)
548 rv = -1;
549 break;
550 default:
551 break;
552 }
553
554 DPRINTF(("fork returns %d\n", rv));
555 return rv;
556 }
557
558 /*
559 * Hybrids
560 */
561
562 ssize_t
563 read(int fd, void *buf, size_t len)
564 {
565 ssize_t (*op_read)(int, void *, size_t);
566 ssize_t n;
567
568 DPRINTF(("read %d\n", fd));
569 if (fd_isrump(fd)) {
570 fd = fd_host2rump(fd);
571 op_read = rumpcalls[RUMPCALL_READ];
572 } else {
573 op_read = host_read;
574 }
575
576 n = op_read(fd, buf, len);
577 return n;
578 }
579
580 ssize_t
581 readv(int fd, const struct iovec *iov, int iovcnt)
582 {
583 ssize_t (*op_readv)(int, const struct iovec *, int);
584
585 DPRINTF(("readv %d\n", fd));
586 if (fd_isrump(fd)) {
587 fd = fd_host2rump(fd);
588 op_readv = rumpcalls[RUMPCALL_READV];
589 } else {
590 op_readv = host_readv;
591 }
592
593 return op_readv(fd, iov, iovcnt);
594 }
595
596 ssize_t
597 write(int fd, const void *buf, size_t len)
598 {
599 ssize_t (*op_write)(int, const void *, size_t);
600
601 if (fd_isrump(fd)) {
602 fd = fd_host2rump(fd);
603 op_write = rumpcalls[RUMPCALL_WRITE];
604 } else {
605 op_write = host_write;
606 }
607
608 return op_write(fd, buf, len);
609 }
610
611 ssize_t
612 writev(int fd, const struct iovec *iov, int iovcnt)
613 {
614 ssize_t (*op_writev)(int, const struct iovec *, int);
615
616 DPRINTF(("writev %d\n", fd));
617 if (fd_isrump(fd)) {
618 fd = fd_host2rump(fd);
619 op_writev = rumpcalls[RUMPCALL_WRITEV];
620 } else {
621 op_writev = host_writev;
622 }
623
624 return op_writev(fd, iov, iovcnt);
625 }
626
627 int
628 ioctl(int fd, unsigned long cmd, ...)
629 {
630 int (*op_ioctl)(int, unsigned long cmd, ...);
631 va_list ap;
632 int rv;
633
634 DPRINTF(("ioctl\n"));
635 if (fd_isrump(fd)) {
636 fd = fd_host2rump(fd);
637 op_ioctl = rumpcalls[RUMPCALL_IOCTL];
638 } else {
639 op_ioctl = host_ioctl;
640 }
641
642 va_start(ap, cmd);
643 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
644 va_end(ap);
645 return rv;
646 }
647
648 int
649 fcntl(int fd, int cmd, ...)
650 {
651 int (*op_fcntl)(int, int, ...);
652 va_list ap;
653 int rv;
654
655 DPRINTF(("fcntl\n"));
656 if (fd_isrump(fd)) {
657 fd = fd_host2rump(fd);
658 op_fcntl = rumpcalls[RUMPCALL_FCNTL];
659 } else {
660 op_fcntl = host_fcntl;
661 }
662
663 va_start(ap, cmd);
664 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
665 va_end(ap);
666 return rv;
667 }
668
669 int
670 close(int fd)
671 {
672 int (*op_close)(int);
673
674 DPRINTF(("close %d\n", fd));
675 if (fd_isrump(fd)) {
676 fd = fd_host2rump(fd);
677 op_close = rumpcalls[RUMPCALL_CLOSE];
678 } else {
679 op_close = host_close;
680 }
681
682 return op_close(fd);
683 }
684
685 int
686 SELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
687 struct timeval *timeout)
688 {
689 struct pollfd *pfds;
690 struct timespec ts, *tsp = NULL;
691 nfds_t i, j, realnfds;
692 int rv, incr;
693
694 DPRINTF(("select\n"));
695
696 /*
697 * Well, first we must scan the fds to figure out how many
698 * fds there really are. This is because up to and including
699 * nb5 poll() silently refuses nfds > process_open_fds.
700 * Seems to be fixed in current, thank the maker.
701 * god damn cluster...bomb.
702 */
703
704 for (i = 0, realnfds = 0; i < nfds; i++) {
705 if (readfds && FD_ISSET(i, readfds)) {
706 realnfds++;
707 continue;
708 }
709 if (writefds && FD_ISSET(i, writefds)) {
710 realnfds++;
711 continue;
712 }
713 if (exceptfds && FD_ISSET(i, exceptfds)) {
714 realnfds++;
715 continue;
716 }
717 }
718
719 if (realnfds) {
720 pfds = malloc(sizeof(*pfds) * realnfds);
721 if (!pfds)
722 return -1;
723 } else {
724 pfds = NULL;
725 }
726
727 for (i = 0, j = 0; i < nfds; i++) {
728 incr = 0;
729 pfds[j].events = pfds[j].revents = 0;
730 if (readfds && FD_ISSET(i, readfds)) {
731 pfds[j].fd = i;
732 pfds[j].events |= POLLIN;
733 incr=1;
734 }
735 if (writefds && FD_ISSET(i, writefds)) {
736 pfds[j].fd = i;
737 pfds[j].events |= POLLOUT;
738 incr=1;
739 }
740 if (exceptfds && FD_ISSET(i, exceptfds)) {
741 pfds[j].fd = i;
742 pfds[j].events |= POLLHUP|POLLERR;
743 incr=1;
744 }
745 if (incr)
746 j++;
747 }
748
749 if (timeout) {
750 TIMEVAL_TO_TIMESPEC(timeout, &ts);
751 tsp = &ts;
752 }
753 rv = pollts(pfds, realnfds, tsp, NULL);
754 if (rv <= 0)
755 goto out;
756
757 /*
758 * ok, harvest results. first zero out entries (can't use
759 * FD_ZERO for the obvious select-me-not reason). whee.
760 */
761 for (i = 0; i < nfds; i++) {
762 if (readfds)
763 FD_CLR(i, readfds);
764 if (writefds)
765 FD_CLR(i, writefds);
766 if (exceptfds)
767 FD_CLR(i, exceptfds);
768 }
769
770 /* and then plug in the results */
771 for (i = 0; i < realnfds; i++) {
772 if (readfds) {
773 if (pfds[i].revents & POLLIN) {
774 FD_SET(pfds[i].fd, readfds);
775 }
776 }
777 if (writefds) {
778 if (pfds[i].revents & POLLOUT) {
779 FD_SET(pfds[i].fd, writefds);
780 }
781 }
782 if (exceptfds) {
783 if (pfds[i].revents & (POLLHUP|POLLERR)) {
784 FD_SET(pfds[i].fd, exceptfds);
785 }
786 }
787 }
788
789 out:
790 free(pfds);
791 return rv;
792 }
793
794 static void
795 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
796 {
797 nfds_t i;
798
799 for (i = 0; i < nfds; i++) {
800 if (fds[i].fd == -1)
801 continue;
802
803 if (fd_isrump(fds[i].fd))
804 (*rumpcall)++;
805 else
806 (*hostcall)++;
807 }
808 }
809
810 static void
811 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
812 {
813 nfds_t i;
814
815 for (i = 0; i < nfds; i++) {
816 fds[i].fd = fdadj(fds[i].fd);
817 }
818 }
819
820 /*
821 * poll is easy as long as the call comes in the fds only in one
822 * kernel. otherwise its quite tricky...
823 */
824 struct pollarg {
825 struct pollfd *pfds;
826 nfds_t nfds;
827 const struct timespec *ts;
828 const sigset_t *sigmask;
829 int pipefd;
830 int errnum;
831 };
832
833 static void *
834 hostpoll(void *arg)
835 {
836 struct pollarg *parg = arg;
837 intptr_t rv;
838
839 rv = host_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
840 if (rv == -1)
841 parg->errnum = errno;
842 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
843
844 return (void *)(intptr_t)rv;
845 }
846
847 int
848 POLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
849 const sigset_t *sigmask)
850 {
851 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
852 const sigset_t *);
853 int hostcall = 0, rumpcall = 0;
854 pthread_t pt;
855 nfds_t i;
856 int rv;
857
858 DPRINTF(("poll\n"));
859 checkpoll(fds, nfds, &hostcall, &rumpcall);
860
861 if (hostcall && rumpcall) {
862 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
863 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
864 struct pollarg parg;
865 uintptr_t lrv;
866 int sverrno = 0, trv;
867
868 /*
869 * ok, this is where it gets tricky. We must support
870 * this since it's a very common operation in certain
871 * types of software (telnet, netcat, etc). We allocate
872 * two vectors and run two poll commands in separate
873 * threads. Whichever returns first "wins" and the
874 * other kernel's fds won't show activity.
875 */
876 rv = -1;
877
878 /* allocate full vector for O(n) joining after call */
879 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
880 if (!pfd_host)
881 goto out;
882 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
883 if (!pfd_rump) {
884 goto out;
885 }
886
887 /* split vectors */
888 for (i = 0; i < nfds; i++) {
889 if (fds[i].fd == -1) {
890 pfd_host[i].fd = -1;
891 pfd_rump[i].fd = -1;
892 } else if (fd_isrump(fds[i].fd)) {
893 pfd_host[i].fd = -1;
894 pfd_rump[i].fd = fd_host2rump(fds[i].fd);
895 pfd_rump[i].events = fds[i].events;
896 } else {
897 pfd_rump[i].fd = -1;
898 pfd_host[i].fd = fds[i].fd;
899 pfd_host[i].events = fds[i].events;
900 }
901 fds[i].revents = 0;
902 }
903
904 /*
905 * then, open two pipes, one for notifications
906 * to each kernel.
907 */
908 if (rump_sys_pipe(rpipe) == -1)
909 goto out;
910 if (pipe(hpipe) == -1)
911 goto out;
912
913 pfd_host[nfds].fd = hpipe[0];
914 pfd_host[nfds].events = POLLIN;
915 pfd_rump[nfds].fd = rpipe[0];
916 pfd_rump[nfds].events = POLLIN;
917
918 /*
919 * then, create a thread to do host part and meanwhile
920 * do rump kernel part right here
921 */
922
923 parg.pfds = pfd_host;
924 parg.nfds = nfds+1;
925 parg.ts = ts;
926 parg.sigmask = sigmask;
927 parg.pipefd = rpipe[1];
928 pthread_create(&pt, NULL, hostpoll, &parg);
929
930 op_pollts = rumpcalls[RUMPCALL_POLLTS];
931 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
932 sverrno = errno;
933 write(hpipe[1], &rv, sizeof(rv));
934 pthread_join(pt, (void *)&trv);
935
936 /* check who "won" and merge results */
937 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
938 rv = trv;
939
940 for (i = 0; i < nfds; i++) {
941 if (pfd_rump[i].fd != -1)
942 fds[i].revents = pfd_rump[i].revents;
943 }
944 sverrno = parg.errnum;
945 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
946 rv = trv;
947
948 for (i = 0; i < nfds; i++) {
949 if (pfd_host[i].fd != -1)
950 fds[i].revents = pfd_host[i].revents;
951 }
952 } else {
953 rv = 0;
954 }
955
956 out:
957 if (rpipe[0] != -1)
958 rump_sys_close(rpipe[0]);
959 if (rpipe[1] != -1)
960 rump_sys_close(rpipe[1]);
961 if (hpipe[0] != -1)
962 host_close(hpipe[0]);
963 if (hpipe[1] != -1)
964 host_close(hpipe[1]);
965 free(pfd_host);
966 free(pfd_rump);
967 errno = sverrno;
968 } else {
969 if (hostcall) {
970 op_pollts = host_pollts;
971 } else {
972 op_pollts = rumpcalls[RUMPCALL_POLLTS];
973 adjustpoll(fds, nfds, fd_host2rump);
974 }
975
976 rv = op_pollts(fds, nfds, ts, sigmask);
977 if (rumpcall)
978 adjustpoll(fds, nfds, fd_rump2host);
979 }
980
981 return rv;
982 }
983
984 int
985 POLL(struct pollfd *fds, nfds_t nfds, int timeout)
986 {
987 struct timespec ts;
988 struct timespec *tsp = NULL;
989
990 if (timeout != INFTIM) {
991 ts.tv_sec = timeout / 1000;
992 ts.tv_nsec = (timeout % 1000) * 1000*1000;
993
994 tsp = &ts;
995 }
996
997 return pollts(fds, nfds, tsp, NULL);
998 }
999
1000 int
1001 kqueue(void)
1002 {
1003
1004 abort();
1005 }
1006
1007 int
1008 kevent(int kq, const struct kevent *changelist, size_t nchanges,
1009 struct kevent *eventlist, size_t nevents,
1010 const struct timespec *timeout)
1011 {
1012
1013 abort();
1014 }
1015