hijack.c revision 1.16 1 /* $NetBSD: hijack.c,v 1.16 2011/01/19 11:27:01 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __RCSID("$NetBSD: hijack.c,v 1.16 2011/01/19 11:27:01 pooka Exp $");
30
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/event.h>
34 #include <sys/ioctl.h>
35 #include <sys/socket.h>
36 #include <sys/poll.h>
37
38 #include <rump/rumpclient.h>
39 #include <rump/rump_syscalls.h>
40
41 #include <assert.h>
42 #include <dlfcn.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <fcntl.h>
46 #include <poll.h>
47 #include <pthread.h>
48 #include <signal.h>
49 #include <stdarg.h>
50 #include <stdbool.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <time.h>
54 #include <unistd.h>
55
56 enum { RUMPCALL_SOCKET, RUMPCALL_ACCEPT, RUMPCALL_BIND, RUMPCALL_CONNECT,
57 RUMPCALL_GETPEERNAME, RUMPCALL_GETSOCKNAME, RUMPCALL_LISTEN,
58 RUMPCALL_RECVFROM, RUMPCALL_RECVMSG,
59 RUMPCALL_SENDTO, RUMPCALL_SENDMSG,
60 RUMPCALL_GETSOCKOPT, RUMPCALL_SETSOCKOPT,
61 RUMPCALL_SHUTDOWN,
62 RUMPCALL_READ, RUMPCALL_READV,
63 RUMPCALL_WRITE, RUMPCALL_WRITEV,
64 RUMPCALL_IOCTL, RUMPCALL_FCNTL,
65 RUMPCALL_CLOSE,
66 RUMPCALL_POLLTS,
67 RUMPCALL__NUM
68 };
69
70 #define RSYS_STRING(a) __STRING(a)
71 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
72
73 const char *sysnames[] = {
74 RSYS_NAME(SOCKET),
75 RSYS_NAME(ACCEPT),
76 RSYS_NAME(BIND),
77 RSYS_NAME(CONNECT),
78 RSYS_NAME(GETPEERNAME),
79 RSYS_NAME(GETSOCKNAME),
80 RSYS_NAME(LISTEN),
81 RSYS_NAME(RECVFROM),
82 RSYS_NAME(RECVMSG),
83 RSYS_NAME(SENDTO),
84 RSYS_NAME(SENDMSG),
85 RSYS_NAME(GETSOCKOPT),
86 RSYS_NAME(SETSOCKOPT),
87 RSYS_NAME(SHUTDOWN),
88 RSYS_NAME(READ),
89 RSYS_NAME(READV),
90 RSYS_NAME(WRITE),
91 RSYS_NAME(WRITEV),
92 RSYS_NAME(IOCTL),
93 RSYS_NAME(FCNTL),
94 RSYS_NAME(CLOSE),
95 RSYS_NAME(POLLTS),
96 };
97
98 static int (*host_socket)(int, int, int);
99 static int (*host_connect)(int, const struct sockaddr *, socklen_t);
100 static int (*host_bind)(int, const struct sockaddr *, socklen_t);
101 static int (*host_listen)(int, int);
102 static int (*host_accept)(int, struct sockaddr *, socklen_t *);
103 static int (*host_getpeername)(int, struct sockaddr *, socklen_t *);
104 static int (*host_getsockname)(int, struct sockaddr *, socklen_t *);
105 static int (*host_setsockopt)(int, int, int, const void *, socklen_t);
106
107 static ssize_t (*host_read)(int, void *, size_t);
108 static ssize_t (*host_readv)(int, const struct iovec *, int);
109 static ssize_t (*host_write)(int, const void *, size_t);
110 static ssize_t (*host_writev)(int, const struct iovec *, int);
111 static int (*host_ioctl)(int, unsigned long, ...);
112 static int (*host_fcntl)(int, int, ...);
113 static int (*host_close)(int);
114 static int (*host_pollts)(struct pollfd *, nfds_t,
115 const struct timespec *, const sigset_t *);
116 static pid_t (*host_fork)(void);
117 static int (*host_dup2)(int, int);
118 static int (*host_shutdown)(int, int);
119 /* XXX */
120 static void *host_sendto;
121 static void *host_recvfrom;
122
123 static void *rumpcalls[RUMPCALL__NUM];
124
125 /*
126 * Would be nice to get this automatically in sync with libc.
127 * Also, this does not work for compat-using binaries!
128 */
129
130 #if !__NetBSD_Prereq__(5,99,7)
131 #define SELECT select
132 #define POLLTS pollts
133 #define POLL poll
134 #else
135 #define SELECT __select50
136 #define POLLTS __pollts50
137 #define POLL __poll50
138
139 int SELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
140 int POLLTS(struct pollfd *, nfds_t, const struct timespec *, const sigset_t *);
141 int POLL(struct pollfd *, nfds_t, int);
142 #endif
143
144 /*
145 * This is called from librumpclient in case of LD_PRELOAD.
146 * It ensures correct RTLD_NEXT.
147 */
148 static void *
149 hijackdlsym(void *handle, const char *symbol)
150 {
151
152 return dlsym(handle, symbol);
153 }
154
155 /* low calorie sockets? */
156 static bool hostlocalsockets = true;
157
158 static void __attribute__((constructor))
159 rcinit(void)
160 {
161 int (*rumpcinit)(void);
162 void **rumpcdlsym;
163 void *hand;
164 int i;
165
166 hand = dlopen("librumpclient.so", RTLD_LAZY|RTLD_GLOBAL);
167 if (!hand)
168 err(1, "cannot open librumpclient.so");
169 rumpcinit = dlsym(hand, "rumpclient_init");
170 _DIAGASSERT(rumpcinit);
171
172 rumpcdlsym = dlsym(hand, "rumpclient_dlsym");
173 *rumpcdlsym = hijackdlsym;
174
175 host_socket = dlsym(RTLD_NEXT, "__socket30");
176 host_listen = dlsym(RTLD_NEXT, "listen");
177 host_connect = dlsym(RTLD_NEXT, "connect");
178 host_bind = dlsym(RTLD_NEXT, "bind");
179 host_accept = dlsym(RTLD_NEXT, "accept");
180 host_getpeername = dlsym(RTLD_NEXT, "getpeername");
181 host_getsockname = dlsym(RTLD_NEXT, "getsockname");
182 host_setsockopt = dlsym(RTLD_NEXT, "setsockopt");
183
184 host_read = dlsym(RTLD_NEXT, "read");
185 host_readv = dlsym(RTLD_NEXT, "readv");
186 host_write = dlsym(RTLD_NEXT, "write");
187 host_writev = dlsym(RTLD_NEXT, "writev");
188 host_ioctl = dlsym(RTLD_NEXT, "ioctl");
189 host_fcntl = dlsym(RTLD_NEXT, "fcntl");
190 host_close = dlsym(RTLD_NEXT, "close");
191 host_pollts = dlsym(RTLD_NEXT, "pollts");
192 host_fork = dlsym(RTLD_NEXT, "fork");
193 host_dup2 = dlsym(RTLD_NEXT, "dup2");
194 host_shutdown = dlsym(RTLD_NEXT, "shutdown");
195 host_sendto = dlsym(RTLD_NEXT, "sendto");
196 host_recvfrom = dlsym(RTLD_NEXT, "recvfrom");
197
198 for (i = 0; i < RUMPCALL__NUM; i++) {
199 rumpcalls[i] = dlsym(hand, sysnames[i]);
200 if (!rumpcalls[i]) {
201 fprintf(stderr, "rumphijack: cannot find symbol: %s\n",
202 sysnames[i]);
203 exit(1);
204 }
205 }
206
207 if (rumpcinit() == -1)
208 err(1, "rumpclient init");
209 }
210
211 static unsigned dup2mask;
212 #define ISDUP2D(fd) (1<<(fd) & dup2mask)
213
214 //#define DEBUGJACK
215 #ifdef DEBUGJACK
216 #define DPRINTF(x) mydprintf x
217 static void
218 mydprintf(const char *fmt, ...)
219 {
220 va_list ap;
221
222 if (ISDUP2D(STDERR_FILENO))
223 return;
224
225 va_start(ap, fmt);
226 vfprintf(stderr, fmt, ap);
227 va_end(ap);
228 }
229
230 #else
231 #define DPRINTF(x)
232 #endif
233
234 /* XXX: need runtime selection. low for now due to FD_SETSIZE */
235 #define HIJACK_FDOFF 128
236 #define HIJACK_SELECT 128 /* XXX */
237 #define HIJACK_ASSERT 128 /* XXX */
238 static int
239 fd_rump2host(int fd)
240 {
241
242 if (fd == -1)
243 return fd;
244
245 if (!ISDUP2D(fd))
246 fd += HIJACK_FDOFF;
247
248 return fd;
249 }
250
251 static int
252 fd_host2rump(int fd)
253 {
254
255 if (!ISDUP2D(fd))
256 fd -= HIJACK_FDOFF;
257 return fd;
258 }
259
260 static bool
261 fd_isrump(int fd)
262 {
263
264 return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
265 }
266
267 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
268 #undef HIJACK_FDOFF
269
270 int __socket30(int, int, int);
271 int
272 __socket30(int domain, int type, int protocol)
273 {
274 int (*rc_socket)(int, int, int);
275 int fd;
276 bool dohost;
277
278 dohost = hostlocalsockets && (domain == AF_LOCAL);
279
280 if (dohost)
281 rc_socket = host_socket;
282 else
283 rc_socket = rumpcalls[RUMPCALL_SOCKET];
284 fd = rc_socket(domain, type, protocol);
285
286 if (!dohost)
287 fd = fd_rump2host(fd);
288 DPRINTF(("socket <- %d\n", fd));
289
290 return fd;
291 }
292
293 int
294 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
295 {
296 int (*rc_accept)(int, struct sockaddr *, socklen_t *);
297 int fd;
298 bool isrump;
299
300 isrump = fd_isrump(s);
301
302 DPRINTF(("accept -> %d", s));
303 if (isrump) {
304 rc_accept = rumpcalls[RUMPCALL_ACCEPT];
305 s = fd_host2rump(s);
306 } else {
307 rc_accept = host_accept;
308 }
309 fd = rc_accept(s, addr, addrlen);
310 if (fd != -1 && isrump)
311 fd = fd_rump2host(fd);
312
313 DPRINTF((" <- %d\n", fd));
314
315 return fd;
316 }
317
318 int
319 bind(int s, const struct sockaddr *name, socklen_t namelen)
320 {
321 int (*rc_bind)(int, const struct sockaddr *, socklen_t);
322
323 DPRINTF(("bind -> %d\n", s));
324 if (fd_isrump(s)) {
325 rc_bind = rumpcalls[RUMPCALL_BIND];
326 s = fd_host2rump(s);
327 } else {
328 rc_bind = host_bind;
329 }
330 return rc_bind(s, name, namelen);
331 }
332
333 int
334 connect(int s, const struct sockaddr *name, socklen_t namelen)
335 {
336 int (*rc_connect)(int, const struct sockaddr *, socklen_t);
337
338 DPRINTF(("connect -> %d\n", s));
339 if (fd_isrump(s)) {
340 rc_connect = rumpcalls[RUMPCALL_CONNECT];
341 s = fd_host2rump(s);
342 } else {
343 rc_connect = host_connect;
344 }
345
346 return rc_connect(s, name, namelen);
347 }
348
349 int
350 getpeername(int s, struct sockaddr *name, socklen_t *namelen)
351 {
352 int (*rc_getpeername)(int, struct sockaddr *, socklen_t *);
353
354 DPRINTF(("getpeername -> %d\n", s));
355 if (fd_isrump(s)) {
356 rc_getpeername = rumpcalls[RUMPCALL_GETPEERNAME];
357 s = fd_host2rump(s);
358 } else {
359 rc_getpeername = host_getpeername;
360 }
361 return rc_getpeername(s, name, namelen);
362 }
363
364 int
365 getsockname(int s, struct sockaddr *name, socklen_t *namelen)
366 {
367 int (*rc_getsockname)(int, struct sockaddr *, socklen_t *);
368
369 DPRINTF(("getsockname -> %d\n", s));
370 if (fd_isrump(s)) {
371 rc_getsockname = rumpcalls[RUMPCALL_GETSOCKNAME];
372 s = fd_host2rump(s);
373 } else {
374 rc_getsockname = host_getsockname;
375 }
376 return rc_getsockname(s, name, namelen);
377 }
378
379 int
380 listen(int s, int backlog)
381 {
382 int (*rc_listen)(int, int);
383
384 DPRINTF(("listen -> %d\n", s));
385 if (fd_isrump(s)) {
386 rc_listen = rumpcalls[RUMPCALL_LISTEN];
387 s = fd_host2rump(s);
388 } else {
389 rc_listen = host_listen;
390 }
391 return rc_listen(s, backlog);
392 }
393
394 ssize_t
395 recv(int s, void *buf, size_t len, int flags)
396 {
397
398 return recvfrom(s, buf, len, flags, NULL, NULL);
399 }
400
401 ssize_t
402 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
403 socklen_t *fromlen)
404 {
405 int (*rc_recvfrom)(int, void *, size_t, int,
406 struct sockaddr *, socklen_t *);
407
408 DPRINTF(("recvfrom\n"));
409 if (fd_isrump(s)) {
410 rc_recvfrom = rumpcalls[RUMPCALL_RECVFROM];
411 s = fd_host2rump(s);
412 } else {
413 rc_recvfrom = host_recvfrom;
414 }
415
416 return rc_recvfrom(s, buf, len, flags, from, fromlen);
417 }
418
419 ssize_t
420 recvmsg(int s, struct msghdr *msg, int flags)
421 {
422 int (*rc_recvmsg)(int, struct msghdr *, int);
423
424 DPRINTF(("recvmsg\n"));
425 assertfd(s);
426 rc_recvmsg = rumpcalls[RUMPCALL_RECVMSG];
427 return rc_recvmsg(fd_host2rump(s), msg, flags);
428 }
429
430 ssize_t
431 send(int s, const void *buf, size_t len, int flags)
432 {
433
434 return sendto(s, buf, len, flags, NULL, 0);
435 }
436
437 ssize_t
438 sendto(int s, const void *buf, size_t len, int flags,
439 const struct sockaddr *to, socklen_t tolen)
440 {
441 int (*rc_sendto)(int, const void *, size_t, int,
442 const struct sockaddr *, socklen_t);
443
444 if (s == -1)
445 return len;
446 DPRINTF(("sendto\n"));
447
448 if (fd_isrump(s)) {
449 rc_sendto = rumpcalls[RUMPCALL_SENDTO];
450 s = fd_host2rump(s);
451 } else {
452 rc_sendto = host_sendto;
453 }
454 return rc_sendto(s, buf, len, flags, to, tolen);
455 }
456
457 ssize_t
458 sendmsg(int s, const struct msghdr *msg, int flags)
459 {
460 int (*rc_sendmsg)(int, const struct msghdr *, int);
461
462 DPRINTF(("sendmsg\n"));
463 assertfd(s);
464 rc_sendmsg = rumpcalls[RUMPCALL_SENDTO];
465 return rc_sendmsg(fd_host2rump(s), msg, flags);
466 }
467
468 int
469 getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
470 {
471 int (*rc_getsockopt)(int, int, int, void *, socklen_t *);
472
473 DPRINTF(("getsockopt -> %d\n", s));
474 assertfd(s);
475 rc_getsockopt = rumpcalls[RUMPCALL_GETSOCKOPT];
476 return rc_getsockopt(fd_host2rump(s), level, optname, optval, optlen);
477 }
478
479 int
480 setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen)
481 {
482 int (*rc_setsockopt)(int, int, int, const void *, socklen_t);
483
484 DPRINTF(("setsockopt -> %d\n", s));
485 if (fd_isrump(s)) {
486 rc_setsockopt = rumpcalls[RUMPCALL_SETSOCKOPT];
487 s = fd_host2rump(s);
488 } else {
489 rc_setsockopt = host_setsockopt;
490 }
491 return rc_setsockopt(s, level, optname, optval, optlen);
492 }
493
494 int
495 shutdown(int s, int how)
496 {
497 int (*rc_shutdown)(int, int);
498
499 DPRINTF(("shutdown -> %d\n", s));
500 if (fd_isrump(s)) {
501 rc_shutdown = rumpcalls[RUMPCALL_SHUTDOWN];
502 s = fd_host2rump(s);
503 } else {
504 rc_shutdown = host_shutdown;
505 }
506 return rc_shutdown(s, how);
507 }
508
509 /*
510 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
511 * many programs do that. dup2 of a rump kernel fd to another value
512 * not >= fdoff is an error.
513 *
514 * Note: cannot rump2host newd, because it is often hardcoded.
515 *
516 * XXX: should disable debug prints after stdout/stderr are dup2'd
517 */
518 int
519 dup2(int oldd, int newd)
520 {
521 int rv;
522
523 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
524
525 if (fd_isrump(oldd)) {
526 if (!(newd >= 0 && newd <= 2))
527 return EBADF;
528 oldd = fd_host2rump(oldd);
529 rv = rump_sys_dup2(oldd, newd);
530 if (rv != -1)
531 dup2mask |= 1<<newd;
532 } else {
533 rv = host_dup2(oldd, newd);
534 }
535
536 return rv;
537 }
538
539 /*
540 * We just wrap fork the appropriate rump client calls to preserve
541 * the file descriptors of the forked parent in the child, but
542 * prevent double use of connection fd.
543 */
544
545 pid_t
546 fork()
547 {
548 struct rumpclient_fork *rf;
549 pid_t rv;
550
551 DPRINTF(("fork\n"));
552
553 if ((rf = rumpclient_prefork()) == NULL)
554 return -1;
555
556 switch ((rv = host_fork())) {
557 case -1:
558 /* XXX: cancel rf */
559 break;
560 case 0:
561 if (rumpclient_fork_init(rf) == -1)
562 rv = -1;
563 break;
564 default:
565 break;
566 }
567
568 DPRINTF(("fork returns %d\n", rv));
569 return rv;
570 }
571
572 /*
573 * Hybrids
574 */
575
576 ssize_t
577 read(int fd, void *buf, size_t len)
578 {
579 ssize_t (*op_read)(int, void *, size_t);
580 ssize_t n;
581
582 DPRINTF(("read %d\n", fd));
583 if (fd_isrump(fd)) {
584 fd = fd_host2rump(fd);
585 op_read = rumpcalls[RUMPCALL_READ];
586 } else {
587 op_read = host_read;
588 }
589
590 n = op_read(fd, buf, len);
591 return n;
592 }
593
594 ssize_t
595 readv(int fd, const struct iovec *iov, int iovcnt)
596 {
597 ssize_t (*op_readv)(int, const struct iovec *, int);
598
599 DPRINTF(("readv %d\n", fd));
600 if (fd_isrump(fd)) {
601 fd = fd_host2rump(fd);
602 op_readv = rumpcalls[RUMPCALL_READV];
603 } else {
604 op_readv = host_readv;
605 }
606
607 return op_readv(fd, iov, iovcnt);
608 }
609
610 ssize_t
611 write(int fd, const void *buf, size_t len)
612 {
613 ssize_t (*op_write)(int, const void *, size_t);
614
615 if (fd_isrump(fd)) {
616 fd = fd_host2rump(fd);
617 op_write = rumpcalls[RUMPCALL_WRITE];
618 } else {
619 op_write = host_write;
620 }
621
622 return op_write(fd, buf, len);
623 }
624
625 ssize_t
626 writev(int fd, const struct iovec *iov, int iovcnt)
627 {
628 ssize_t (*op_writev)(int, const struct iovec *, int);
629
630 DPRINTF(("writev %d\n", fd));
631 if (fd_isrump(fd)) {
632 fd = fd_host2rump(fd);
633 op_writev = rumpcalls[RUMPCALL_WRITEV];
634 } else {
635 op_writev = host_writev;
636 }
637
638 return op_writev(fd, iov, iovcnt);
639 }
640
641 int
642 ioctl(int fd, unsigned long cmd, ...)
643 {
644 int (*op_ioctl)(int, unsigned long cmd, ...);
645 va_list ap;
646 int rv;
647
648 DPRINTF(("ioctl\n"));
649 if (fd_isrump(fd)) {
650 fd = fd_host2rump(fd);
651 op_ioctl = rumpcalls[RUMPCALL_IOCTL];
652 } else {
653 op_ioctl = host_ioctl;
654 }
655
656 va_start(ap, cmd);
657 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
658 va_end(ap);
659 return rv;
660 }
661
662 int
663 fcntl(int fd, int cmd, ...)
664 {
665 int (*op_fcntl)(int, int, ...);
666 va_list ap;
667 int rv;
668
669 DPRINTF(("fcntl\n"));
670 if (fd_isrump(fd)) {
671 fd = fd_host2rump(fd);
672 op_fcntl = rumpcalls[RUMPCALL_FCNTL];
673 } else {
674 op_fcntl = host_fcntl;
675 }
676
677 va_start(ap, cmd);
678 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
679 va_end(ap);
680 return rv;
681 }
682
683 int
684 close(int fd)
685 {
686 int (*op_close)(int);
687
688 DPRINTF(("close %d\n", fd));
689 if (fd_isrump(fd)) {
690 fd = fd_host2rump(fd);
691 op_close = rumpcalls[RUMPCALL_CLOSE];
692 } else {
693 op_close = host_close;
694 }
695
696 return op_close(fd);
697 }
698
699 int
700 SELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
701 struct timeval *timeout)
702 {
703 struct pollfd *pfds;
704 struct timespec ts, *tsp = NULL;
705 nfds_t i, j, realnfds;
706 int rv, incr;
707
708 DPRINTF(("select\n"));
709
710 /*
711 * Well, first we must scan the fds to figure out how many
712 * fds there really are. This is because up to and including
713 * nb5 poll() silently refuses nfds > process_open_fds.
714 * Seems to be fixed in current, thank the maker.
715 * god damn cluster...bomb.
716 */
717
718 for (i = 0, realnfds = 0; i < nfds; i++) {
719 if (readfds && FD_ISSET(i, readfds)) {
720 realnfds++;
721 continue;
722 }
723 if (writefds && FD_ISSET(i, writefds)) {
724 realnfds++;
725 continue;
726 }
727 if (exceptfds && FD_ISSET(i, exceptfds)) {
728 realnfds++;
729 continue;
730 }
731 }
732
733 if (realnfds) {
734 pfds = malloc(sizeof(*pfds) * realnfds);
735 if (!pfds)
736 return -1;
737 } else {
738 pfds = NULL;
739 }
740
741 for (i = 0, j = 0; i < nfds; i++) {
742 incr = 0;
743 pfds[j].events = pfds[j].revents = 0;
744 if (readfds && FD_ISSET(i, readfds)) {
745 pfds[j].fd = i;
746 pfds[j].events |= POLLIN;
747 incr=1;
748 }
749 if (writefds && FD_ISSET(i, writefds)) {
750 pfds[j].fd = i;
751 pfds[j].events |= POLLOUT;
752 incr=1;
753 }
754 if (exceptfds && FD_ISSET(i, exceptfds)) {
755 pfds[j].fd = i;
756 pfds[j].events |= POLLHUP|POLLERR;
757 incr=1;
758 }
759 if (incr)
760 j++;
761 }
762
763 if (timeout) {
764 TIMEVAL_TO_TIMESPEC(timeout, &ts);
765 tsp = &ts;
766 }
767 rv = pollts(pfds, realnfds, tsp, NULL);
768 if (rv <= 0)
769 goto out;
770
771 /*
772 * ok, harvest results. first zero out entries (can't use
773 * FD_ZERO for the obvious select-me-not reason). whee.
774 */
775 for (i = 0; i < nfds; i++) {
776 if (readfds)
777 FD_CLR(i, readfds);
778 if (writefds)
779 FD_CLR(i, writefds);
780 if (exceptfds)
781 FD_CLR(i, exceptfds);
782 }
783
784 /* and then plug in the results */
785 for (i = 0; i < realnfds; i++) {
786 if (readfds) {
787 if (pfds[i].revents & POLLIN) {
788 FD_SET(pfds[i].fd, readfds);
789 }
790 }
791 if (writefds) {
792 if (pfds[i].revents & POLLOUT) {
793 FD_SET(pfds[i].fd, writefds);
794 }
795 }
796 if (exceptfds) {
797 if (pfds[i].revents & (POLLHUP|POLLERR)) {
798 FD_SET(pfds[i].fd, exceptfds);
799 }
800 }
801 }
802
803 out:
804 free(pfds);
805 return rv;
806 }
807
808 static void
809 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
810 {
811 nfds_t i;
812
813 for (i = 0; i < nfds; i++) {
814 if (fds[i].fd == -1)
815 continue;
816
817 if (fd_isrump(fds[i].fd))
818 (*rumpcall)++;
819 else
820 (*hostcall)++;
821 }
822 }
823
824 static void
825 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
826 {
827 nfds_t i;
828
829 for (i = 0; i < nfds; i++) {
830 fds[i].fd = fdadj(fds[i].fd);
831 }
832 }
833
834 /*
835 * poll is easy as long as the call comes in the fds only in one
836 * kernel. otherwise its quite tricky...
837 */
838 struct pollarg {
839 struct pollfd *pfds;
840 nfds_t nfds;
841 const struct timespec *ts;
842 const sigset_t *sigmask;
843 int pipefd;
844 int errnum;
845 };
846
847 static void *
848 hostpoll(void *arg)
849 {
850 struct pollarg *parg = arg;
851 intptr_t rv;
852
853 rv = host_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
854 if (rv == -1)
855 parg->errnum = errno;
856 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
857
858 return (void *)(intptr_t)rv;
859 }
860
861 int
862 POLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
863 const sigset_t *sigmask)
864 {
865 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
866 const sigset_t *);
867 int hostcall = 0, rumpcall = 0;
868 pthread_t pt;
869 nfds_t i;
870 int rv;
871
872 DPRINTF(("poll\n"));
873 checkpoll(fds, nfds, &hostcall, &rumpcall);
874
875 if (hostcall && rumpcall) {
876 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
877 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
878 struct pollarg parg;
879 uintptr_t lrv;
880 int sverrno = 0, trv;
881
882 /*
883 * ok, this is where it gets tricky. We must support
884 * this since it's a very common operation in certain
885 * types of software (telnet, netcat, etc). We allocate
886 * two vectors and run two poll commands in separate
887 * threads. Whichever returns first "wins" and the
888 * other kernel's fds won't show activity.
889 */
890 rv = -1;
891
892 /* allocate full vector for O(n) joining after call */
893 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
894 if (!pfd_host)
895 goto out;
896 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
897 if (!pfd_rump) {
898 goto out;
899 }
900
901 /* split vectors */
902 for (i = 0; i < nfds; i++) {
903 if (fds[i].fd == -1) {
904 pfd_host[i].fd = -1;
905 pfd_rump[i].fd = -1;
906 } else if (fd_isrump(fds[i].fd)) {
907 pfd_host[i].fd = -1;
908 pfd_rump[i].fd = fd_host2rump(fds[i].fd);
909 pfd_rump[i].events = fds[i].events;
910 } else {
911 pfd_rump[i].fd = -1;
912 pfd_host[i].fd = fds[i].fd;
913 pfd_host[i].events = fds[i].events;
914 }
915 fds[i].revents = 0;
916 }
917
918 /*
919 * then, open two pipes, one for notifications
920 * to each kernel.
921 */
922 if (rump_sys_pipe(rpipe) == -1)
923 goto out;
924 if (pipe(hpipe) == -1)
925 goto out;
926
927 pfd_host[nfds].fd = hpipe[0];
928 pfd_host[nfds].events = POLLIN;
929 pfd_rump[nfds].fd = rpipe[0];
930 pfd_rump[nfds].events = POLLIN;
931
932 /*
933 * then, create a thread to do host part and meanwhile
934 * do rump kernel part right here
935 */
936
937 parg.pfds = pfd_host;
938 parg.nfds = nfds+1;
939 parg.ts = ts;
940 parg.sigmask = sigmask;
941 parg.pipefd = rpipe[1];
942 pthread_create(&pt, NULL, hostpoll, &parg);
943
944 op_pollts = rumpcalls[RUMPCALL_POLLTS];
945 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
946 sverrno = errno;
947 write(hpipe[1], &rv, sizeof(rv));
948 pthread_join(pt, (void *)&trv);
949
950 /* check who "won" and merge results */
951 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
952 rv = trv;
953
954 for (i = 0; i < nfds; i++) {
955 if (pfd_rump[i].fd != -1)
956 fds[i].revents = pfd_rump[i].revents;
957 }
958 sverrno = parg.errnum;
959 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
960 rv = trv;
961
962 for (i = 0; i < nfds; i++) {
963 if (pfd_host[i].fd != -1)
964 fds[i].revents = pfd_host[i].revents;
965 }
966 } else {
967 rv = 0;
968 }
969
970 out:
971 if (rpipe[0] != -1)
972 rump_sys_close(rpipe[0]);
973 if (rpipe[1] != -1)
974 rump_sys_close(rpipe[1]);
975 if (hpipe[0] != -1)
976 host_close(hpipe[0]);
977 if (hpipe[1] != -1)
978 host_close(hpipe[1]);
979 free(pfd_host);
980 free(pfd_rump);
981 errno = sverrno;
982 } else {
983 if (hostcall) {
984 op_pollts = host_pollts;
985 } else {
986 op_pollts = rumpcalls[RUMPCALL_POLLTS];
987 adjustpoll(fds, nfds, fd_host2rump);
988 }
989
990 rv = op_pollts(fds, nfds, ts, sigmask);
991 if (rumpcall)
992 adjustpoll(fds, nfds, fd_rump2host);
993 }
994
995 return rv;
996 }
997
998 int
999 POLL(struct pollfd *fds, nfds_t nfds, int timeout)
1000 {
1001 struct timespec ts;
1002 struct timespec *tsp = NULL;
1003
1004 if (timeout != INFTIM) {
1005 ts.tv_sec = timeout / 1000;
1006 ts.tv_nsec = (timeout % 1000) * 1000*1000;
1007
1008 tsp = &ts;
1009 }
1010
1011 return pollts(fds, nfds, tsp, NULL);
1012 }
1013
1014 int
1015 kqueue(void)
1016 {
1017
1018 abort();
1019 }
1020
1021 int
1022 kevent(int kq, const struct kevent *changelist, size_t nchanges,
1023 struct kevent *eventlist, size_t nevents,
1024 const struct timespec *timeout)
1025 {
1026
1027 abort();
1028 }
1029