hijack.c revision 1.16.2.1 1 /* $NetBSD: hijack.c,v 1.16.2.1 2011/01/20 14:24:53 bouyer Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __RCSID("$NetBSD: hijack.c,v 1.16.2.1 2011/01/20 14:24:53 bouyer Exp $");
30
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/event.h>
34 #include <sys/ioctl.h>
35 #include <sys/socket.h>
36 #include <sys/poll.h>
37
38 #include <rump/rumpclient.h>
39 #include <rump/rump_syscalls.h>
40
41 #include <assert.h>
42 #include <dlfcn.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <fcntl.h>
46 #include <poll.h>
47 #include <pthread.h>
48 #include <signal.h>
49 #include <stdarg.h>
50 #include <stdbool.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <time.h>
54 #include <unistd.h>
55
56 enum { RUMPCALL_SOCKET, RUMPCALL_ACCEPT, RUMPCALL_BIND, RUMPCALL_CONNECT,
57 RUMPCALL_GETPEERNAME, RUMPCALL_GETSOCKNAME, RUMPCALL_LISTEN,
58 RUMPCALL_RECVFROM, RUMPCALL_RECVMSG,
59 RUMPCALL_SENDTO, RUMPCALL_SENDMSG,
60 RUMPCALL_GETSOCKOPT, RUMPCALL_SETSOCKOPT,
61 RUMPCALL_SHUTDOWN,
62 RUMPCALL_READ, RUMPCALL_READV,
63 RUMPCALL_WRITE, RUMPCALL_WRITEV,
64 RUMPCALL_IOCTL, RUMPCALL_FCNTL,
65 RUMPCALL_CLOSE,
66 RUMPCALL_POLLTS,
67 RUMPCALL_QUOTACTL,
68 RUMPCALL_MOUNT,
69 RUMPCALL_GETVFSSTAT,
70 RUMPCALL__NUM
71 };
72
73 #define RSYS_STRING(a) __STRING(a)
74 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
75
76 const char *sysnames[] = {
77 RSYS_NAME(SOCKET),
78 RSYS_NAME(ACCEPT),
79 RSYS_NAME(BIND),
80 RSYS_NAME(CONNECT),
81 RSYS_NAME(GETPEERNAME),
82 RSYS_NAME(GETSOCKNAME),
83 RSYS_NAME(LISTEN),
84 RSYS_NAME(RECVFROM),
85 RSYS_NAME(RECVMSG),
86 RSYS_NAME(SENDTO),
87 RSYS_NAME(SENDMSG),
88 RSYS_NAME(GETSOCKOPT),
89 RSYS_NAME(SETSOCKOPT),
90 RSYS_NAME(SHUTDOWN),
91 RSYS_NAME(READ),
92 RSYS_NAME(READV),
93 RSYS_NAME(WRITE),
94 RSYS_NAME(WRITEV),
95 RSYS_NAME(IOCTL),
96 RSYS_NAME(FCNTL),
97 RSYS_NAME(CLOSE),
98 RSYS_NAME(POLLTS),
99 RSYS_NAME(QUOTACTL),
100 RSYS_NAME(MOUNT),
101 RSYS_NAME(GETVFSSTAT),
102 };
103
104 static int (*host_socket)(int, int, int);
105 static int (*host_connect)(int, const struct sockaddr *, socklen_t);
106 static int (*host_bind)(int, const struct sockaddr *, socklen_t);
107 static int (*host_listen)(int, int);
108 static int (*host_accept)(int, struct sockaddr *, socklen_t *);
109 static int (*host_getpeername)(int, struct sockaddr *, socklen_t *);
110 static int (*host_getsockname)(int, struct sockaddr *, socklen_t *);
111 static int (*host_setsockopt)(int, int, int, const void *, socklen_t);
112
113 static ssize_t (*host_read)(int, void *, size_t);
114 static ssize_t (*host_readv)(int, const struct iovec *, int);
115 static ssize_t (*host_write)(int, const void *, size_t);
116 static ssize_t (*host_writev)(int, const struct iovec *, int);
117 static int (*host_ioctl)(int, unsigned long, ...);
118 static int (*host_fcntl)(int, int, ...);
119 static int (*host_close)(int);
120 static int (*host_pollts)(struct pollfd *, nfds_t,
121 const struct timespec *, const sigset_t *);
122 static pid_t (*host_fork)(void);
123 static int (*host_dup2)(int, int);
124 static int (*host_shutdown)(int, int);
125 /* XXX */
126 static void *host_sendto;
127 static void *host_recvfrom;
128
129 static void *rumpcalls[RUMPCALL__NUM];
130
131 /*
132 * Would be nice to get this automatically in sync with libc.
133 * Also, this does not work for compat-using binaries!
134 */
135
136 #if !__NetBSD_Prereq__(5,99,7)
137 #define SELECT select
138 #define POLLTS pollts
139 #define POLL poll
140 #else
141 #define SELECT __select50
142 #define POLLTS __pollts50
143 #define POLL __poll50
144
145 int SELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
146 int POLLTS(struct pollfd *, nfds_t, const struct timespec *, const sigset_t *);
147 int POLL(struct pollfd *, nfds_t, int);
148 #endif
149
150 /*
151 * This is called from librumpclient in case of LD_PRELOAD.
152 * It ensures correct RTLD_NEXT.
153 */
154 static void *
155 hijackdlsym(void *handle, const char *symbol)
156 {
157
158 return dlsym(handle, symbol);
159 }
160
161 /* low calorie sockets? */
162 static bool hostlocalsockets = true;
163
164 static void __attribute__((constructor))
165 rcinit(void)
166 {
167 int (*rumpcinit)(void);
168 void **rumpcdlsym;
169 void *hand;
170 int i;
171
172 hand = dlopen("librumpclient.so", RTLD_LAZY|RTLD_GLOBAL);
173 if (!hand)
174 err(1, "cannot open librumpclient.so");
175 rumpcinit = dlsym(hand, "rumpclient_init");
176 _DIAGASSERT(rumpcinit);
177
178 rumpcdlsym = dlsym(hand, "rumpclient_dlsym");
179 *rumpcdlsym = hijackdlsym;
180
181 host_socket = dlsym(RTLD_NEXT, "__socket30");
182 host_listen = dlsym(RTLD_NEXT, "listen");
183 host_connect = dlsym(RTLD_NEXT, "connect");
184 host_bind = dlsym(RTLD_NEXT, "bind");
185 host_accept = dlsym(RTLD_NEXT, "accept");
186 host_getpeername = dlsym(RTLD_NEXT, "getpeername");
187 host_getsockname = dlsym(RTLD_NEXT, "getsockname");
188 host_setsockopt = dlsym(RTLD_NEXT, "setsockopt");
189
190 host_read = dlsym(RTLD_NEXT, "read");
191 host_readv = dlsym(RTLD_NEXT, "readv");
192 host_write = dlsym(RTLD_NEXT, "write");
193 host_writev = dlsym(RTLD_NEXT, "writev");
194 host_ioctl = dlsym(RTLD_NEXT, "ioctl");
195 host_fcntl = dlsym(RTLD_NEXT, "fcntl");
196 host_close = dlsym(RTLD_NEXT, "close");
197 host_pollts = dlsym(RTLD_NEXT, "pollts");
198 host_fork = dlsym(RTLD_NEXT, "fork");
199 host_dup2 = dlsym(RTLD_NEXT, "dup2");
200 host_shutdown = dlsym(RTLD_NEXT, "shutdown");
201 host_sendto = dlsym(RTLD_NEXT, "sendto");
202 host_recvfrom = dlsym(RTLD_NEXT, "recvfrom");
203
204 for (i = 0; i < RUMPCALL__NUM; i++) {
205 rumpcalls[i] = dlsym(hand, sysnames[i]);
206 if (!rumpcalls[i]) {
207 fprintf(stderr, "rumphijack: cannot find symbol: %s\n",
208 sysnames[i]);
209 exit(1);
210 }
211 }
212
213 if (rumpcinit() == -1)
214 err(1, "rumpclient init");
215 }
216
217 static unsigned dup2mask;
218 #define ISDUP2D(fd) (1<<(fd) & dup2mask)
219
220 //#define DEBUGJACK
221 #ifdef DEBUGJACK
222 #define DPRINTF(x) mydprintf x
223 static void
224 mydprintf(const char *fmt, ...)
225 {
226 va_list ap;
227
228 if (ISDUP2D(STDERR_FILENO))
229 return;
230
231 va_start(ap, fmt);
232 vfprintf(stderr, fmt, ap);
233 va_end(ap);
234 }
235
236 #else
237 #define DPRINTF(x)
238 #endif
239
240 /* XXX: need runtime selection. low for now due to FD_SETSIZE */
241 #define HIJACK_FDOFF 128
242 #define HIJACK_SELECT 128 /* XXX */
243 #define HIJACK_ASSERT 128 /* XXX */
244 static int
245 fd_rump2host(int fd)
246 {
247
248 if (fd == -1)
249 return fd;
250
251 if (!ISDUP2D(fd))
252 fd += HIJACK_FDOFF;
253
254 return fd;
255 }
256
257 static int
258 fd_host2rump(int fd)
259 {
260
261 if (!ISDUP2D(fd))
262 fd -= HIJACK_FDOFF;
263 return fd;
264 }
265
266 static bool
267 fd_isrump(int fd)
268 {
269
270 return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
271 }
272
273 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
274 #undef HIJACK_FDOFF
275
276 int
277 __quotactl50(const char * mnt, struct plistref *p)
278 {
279 int (*qctl)(const char *, struct plistref *);
280 int error;
281
282 qctl = rumpcalls[RUMPCALL_QUOTACTL];
283 error = qctl(mnt, p);
284 DPRINTF(("quotactl <- %d\n", error));
285 return error;
286 }
287
288 int
289 __mount50(const char *type, const char *dir, int flags, void *data,
290 size_t data_len)
291 {
292 int (*domount)(const char *, const char *, int, void *, size_t);
293 int error;
294
295 domount = rumpcalls[RUMPCALL_MOUNT];
296 error = domount(type, dir, flags, data, data_len);
297 DPRINTF(("mount <- %d\n", error));
298 return error;
299 }
300
301 int
302 getvfsstat(struct statvfs *buf, size_t bufsize, int flags)
303 {
304 int (*dogetvfsstat)(struct statvfs *, size_t, int);
305 int error;
306
307 dogetvfsstat = rumpcalls[RUMPCALL_GETVFSSTAT];
308 error = dogetvfsstat(buf, bufsize, flags);
309 DPRINTF(("getvfsstat <- %d\n", error));
310 return error;
311 }
312
313 int __socket30(int, int, int);
314 int
315 __socket30(int domain, int type, int protocol)
316 {
317 int (*rc_socket)(int, int, int);
318 int fd;
319 bool dohost;
320
321 dohost = hostlocalsockets && (domain == AF_LOCAL);
322
323 if (dohost)
324 rc_socket = host_socket;
325 else
326 rc_socket = rumpcalls[RUMPCALL_SOCKET];
327 fd = rc_socket(domain, type, protocol);
328
329 if (!dohost)
330 fd = fd_rump2host(fd);
331 DPRINTF(("socket <- %d\n", fd));
332
333 return fd;
334 }
335
336 int
337 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
338 {
339 int (*rc_accept)(int, struct sockaddr *, socklen_t *);
340 int fd;
341 bool isrump;
342
343 isrump = fd_isrump(s);
344
345 DPRINTF(("accept -> %d", s));
346 if (isrump) {
347 rc_accept = rumpcalls[RUMPCALL_ACCEPT];
348 s = fd_host2rump(s);
349 } else {
350 rc_accept = host_accept;
351 }
352 fd = rc_accept(s, addr, addrlen);
353 if (fd != -1 && isrump)
354 fd = fd_rump2host(fd);
355
356 DPRINTF((" <- %d\n", fd));
357
358 return fd;
359 }
360
361 int
362 bind(int s, const struct sockaddr *name, socklen_t namelen)
363 {
364 int (*rc_bind)(int, const struct sockaddr *, socklen_t);
365
366 DPRINTF(("bind -> %d\n", s));
367 if (fd_isrump(s)) {
368 rc_bind = rumpcalls[RUMPCALL_BIND];
369 s = fd_host2rump(s);
370 } else {
371 rc_bind = host_bind;
372 }
373 return rc_bind(s, name, namelen);
374 }
375
376 int
377 connect(int s, const struct sockaddr *name, socklen_t namelen)
378 {
379 int (*rc_connect)(int, const struct sockaddr *, socklen_t);
380
381 DPRINTF(("connect -> %d\n", s));
382 if (fd_isrump(s)) {
383 rc_connect = rumpcalls[RUMPCALL_CONNECT];
384 s = fd_host2rump(s);
385 } else {
386 rc_connect = host_connect;
387 }
388
389 return rc_connect(s, name, namelen);
390 }
391
392 int
393 getpeername(int s, struct sockaddr *name, socklen_t *namelen)
394 {
395 int (*rc_getpeername)(int, struct sockaddr *, socklen_t *);
396
397 DPRINTF(("getpeername -> %d\n", s));
398 if (fd_isrump(s)) {
399 rc_getpeername = rumpcalls[RUMPCALL_GETPEERNAME];
400 s = fd_host2rump(s);
401 } else {
402 rc_getpeername = host_getpeername;
403 }
404 return rc_getpeername(s, name, namelen);
405 }
406
407 int
408 getsockname(int s, struct sockaddr *name, socklen_t *namelen)
409 {
410 int (*rc_getsockname)(int, struct sockaddr *, socklen_t *);
411
412 DPRINTF(("getsockname -> %d\n", s));
413 if (fd_isrump(s)) {
414 rc_getsockname = rumpcalls[RUMPCALL_GETSOCKNAME];
415 s = fd_host2rump(s);
416 } else {
417 rc_getsockname = host_getsockname;
418 }
419 return rc_getsockname(s, name, namelen);
420 }
421
422 int
423 listen(int s, int backlog)
424 {
425 int (*rc_listen)(int, int);
426
427 DPRINTF(("listen -> %d\n", s));
428 if (fd_isrump(s)) {
429 rc_listen = rumpcalls[RUMPCALL_LISTEN];
430 s = fd_host2rump(s);
431 } else {
432 rc_listen = host_listen;
433 }
434 return rc_listen(s, backlog);
435 }
436
437 ssize_t
438 recv(int s, void *buf, size_t len, int flags)
439 {
440
441 return recvfrom(s, buf, len, flags, NULL, NULL);
442 }
443
444 ssize_t
445 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
446 socklen_t *fromlen)
447 {
448 int (*rc_recvfrom)(int, void *, size_t, int,
449 struct sockaddr *, socklen_t *);
450
451 DPRINTF(("recvfrom\n"));
452 if (fd_isrump(s)) {
453 rc_recvfrom = rumpcalls[RUMPCALL_RECVFROM];
454 s = fd_host2rump(s);
455 } else {
456 rc_recvfrom = host_recvfrom;
457 }
458
459 return rc_recvfrom(s, buf, len, flags, from, fromlen);
460 }
461
462 ssize_t
463 recvmsg(int s, struct msghdr *msg, int flags)
464 {
465 int (*rc_recvmsg)(int, struct msghdr *, int);
466
467 DPRINTF(("recvmsg\n"));
468 assertfd(s);
469 rc_recvmsg = rumpcalls[RUMPCALL_RECVMSG];
470 return rc_recvmsg(fd_host2rump(s), msg, flags);
471 }
472
473 ssize_t
474 send(int s, const void *buf, size_t len, int flags)
475 {
476
477 return sendto(s, buf, len, flags, NULL, 0);
478 }
479
480 ssize_t
481 sendto(int s, const void *buf, size_t len, int flags,
482 const struct sockaddr *to, socklen_t tolen)
483 {
484 int (*rc_sendto)(int, const void *, size_t, int,
485 const struct sockaddr *, socklen_t);
486
487 if (s == -1)
488 return len;
489 DPRINTF(("sendto\n"));
490
491 if (fd_isrump(s)) {
492 rc_sendto = rumpcalls[RUMPCALL_SENDTO];
493 s = fd_host2rump(s);
494 } else {
495 rc_sendto = host_sendto;
496 }
497 return rc_sendto(s, buf, len, flags, to, tolen);
498 }
499
500 ssize_t
501 sendmsg(int s, const struct msghdr *msg, int flags)
502 {
503 int (*rc_sendmsg)(int, const struct msghdr *, int);
504
505 DPRINTF(("sendmsg\n"));
506 assertfd(s);
507 rc_sendmsg = rumpcalls[RUMPCALL_SENDTO];
508 return rc_sendmsg(fd_host2rump(s), msg, flags);
509 }
510
511 int
512 getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
513 {
514 int (*rc_getsockopt)(int, int, int, void *, socklen_t *);
515
516 DPRINTF(("getsockopt -> %d\n", s));
517 assertfd(s);
518 rc_getsockopt = rumpcalls[RUMPCALL_GETSOCKOPT];
519 return rc_getsockopt(fd_host2rump(s), level, optname, optval, optlen);
520 }
521
522 int
523 setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen)
524 {
525 int (*rc_setsockopt)(int, int, int, const void *, socklen_t);
526
527 DPRINTF(("setsockopt -> %d\n", s));
528 if (fd_isrump(s)) {
529 rc_setsockopt = rumpcalls[RUMPCALL_SETSOCKOPT];
530 s = fd_host2rump(s);
531 } else {
532 rc_setsockopt = host_setsockopt;
533 }
534 return rc_setsockopt(s, level, optname, optval, optlen);
535 }
536
537 int
538 shutdown(int s, int how)
539 {
540 int (*rc_shutdown)(int, int);
541
542 DPRINTF(("shutdown -> %d\n", s));
543 if (fd_isrump(s)) {
544 rc_shutdown = rumpcalls[RUMPCALL_SHUTDOWN];
545 s = fd_host2rump(s);
546 } else {
547 rc_shutdown = host_shutdown;
548 }
549 return rc_shutdown(s, how);
550 }
551
552 /*
553 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
554 * many programs do that. dup2 of a rump kernel fd to another value
555 * not >= fdoff is an error.
556 *
557 * Note: cannot rump2host newd, because it is often hardcoded.
558 *
559 * XXX: should disable debug prints after stdout/stderr are dup2'd
560 */
561 int
562 dup2(int oldd, int newd)
563 {
564 int rv;
565
566 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
567
568 if (fd_isrump(oldd)) {
569 if (!(newd >= 0 && newd <= 2))
570 return EBADF;
571 oldd = fd_host2rump(oldd);
572 rv = rump_sys_dup2(oldd, newd);
573 if (rv != -1)
574 dup2mask |= 1<<newd;
575 } else {
576 rv = host_dup2(oldd, newd);
577 }
578
579 return rv;
580 }
581
582 /*
583 * We just wrap fork the appropriate rump client calls to preserve
584 * the file descriptors of the forked parent in the child, but
585 * prevent double use of connection fd.
586 */
587
588 pid_t
589 fork()
590 {
591 struct rumpclient_fork *rf;
592 pid_t rv;
593
594 DPRINTF(("fork\n"));
595
596 if ((rf = rumpclient_prefork()) == NULL)
597 return -1;
598
599 switch ((rv = host_fork())) {
600 case -1:
601 /* XXX: cancel rf */
602 break;
603 case 0:
604 if (rumpclient_fork_init(rf) == -1)
605 rv = -1;
606 break;
607 default:
608 break;
609 }
610
611 DPRINTF(("fork returns %d\n", rv));
612 return rv;
613 }
614
615 /*
616 * Hybrids
617 */
618
619 ssize_t
620 read(int fd, void *buf, size_t len)
621 {
622 ssize_t (*op_read)(int, void *, size_t);
623 ssize_t n;
624
625 DPRINTF(("read %d\n", fd));
626 if (fd_isrump(fd)) {
627 fd = fd_host2rump(fd);
628 op_read = rumpcalls[RUMPCALL_READ];
629 } else {
630 op_read = host_read;
631 }
632
633 n = op_read(fd, buf, len);
634 return n;
635 }
636
637 ssize_t
638 readv(int fd, const struct iovec *iov, int iovcnt)
639 {
640 ssize_t (*op_readv)(int, const struct iovec *, int);
641
642 DPRINTF(("readv %d\n", fd));
643 if (fd_isrump(fd)) {
644 fd = fd_host2rump(fd);
645 op_readv = rumpcalls[RUMPCALL_READV];
646 } else {
647 op_readv = host_readv;
648 }
649
650 return op_readv(fd, iov, iovcnt);
651 }
652
653 ssize_t
654 write(int fd, const void *buf, size_t len)
655 {
656 ssize_t (*op_write)(int, const void *, size_t);
657
658 if (fd_isrump(fd)) {
659 fd = fd_host2rump(fd);
660 op_write = rumpcalls[RUMPCALL_WRITE];
661 } else {
662 op_write = host_write;
663 }
664
665 return op_write(fd, buf, len);
666 }
667
668 ssize_t
669 writev(int fd, const struct iovec *iov, int iovcnt)
670 {
671 ssize_t (*op_writev)(int, const struct iovec *, int);
672
673 DPRINTF(("writev %d\n", fd));
674 if (fd_isrump(fd)) {
675 fd = fd_host2rump(fd);
676 op_writev = rumpcalls[RUMPCALL_WRITEV];
677 } else {
678 op_writev = host_writev;
679 }
680
681 return op_writev(fd, iov, iovcnt);
682 }
683
684 int
685 ioctl(int fd, unsigned long cmd, ...)
686 {
687 int (*op_ioctl)(int, unsigned long cmd, ...);
688 va_list ap;
689 int rv;
690
691 DPRINTF(("ioctl\n"));
692 if (fd_isrump(fd)) {
693 fd = fd_host2rump(fd);
694 op_ioctl = rumpcalls[RUMPCALL_IOCTL];
695 } else {
696 op_ioctl = host_ioctl;
697 }
698
699 va_start(ap, cmd);
700 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
701 va_end(ap);
702 return rv;
703 }
704
705 int
706 fcntl(int fd, int cmd, ...)
707 {
708 int (*op_fcntl)(int, int, ...);
709 va_list ap;
710 int rv;
711
712 DPRINTF(("fcntl\n"));
713 if (fd_isrump(fd)) {
714 fd = fd_host2rump(fd);
715 op_fcntl = rumpcalls[RUMPCALL_FCNTL];
716 } else {
717 op_fcntl = host_fcntl;
718 }
719
720 va_start(ap, cmd);
721 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
722 va_end(ap);
723 return rv;
724 }
725
726 int
727 close(int fd)
728 {
729 int (*op_close)(int);
730
731 DPRINTF(("close %d\n", fd));
732 if (fd_isrump(fd)) {
733 fd = fd_host2rump(fd);
734 op_close = rumpcalls[RUMPCALL_CLOSE];
735 } else {
736 op_close = host_close;
737 }
738
739 return op_close(fd);
740 }
741
742 int
743 SELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
744 struct timeval *timeout)
745 {
746 struct pollfd *pfds;
747 struct timespec ts, *tsp = NULL;
748 nfds_t i, j, realnfds;
749 int rv, incr;
750
751 DPRINTF(("select\n"));
752
753 /*
754 * Well, first we must scan the fds to figure out how many
755 * fds there really are. This is because up to and including
756 * nb5 poll() silently refuses nfds > process_open_fds.
757 * Seems to be fixed in current, thank the maker.
758 * god damn cluster...bomb.
759 */
760
761 for (i = 0, realnfds = 0; i < nfds; i++) {
762 if (readfds && FD_ISSET(i, readfds)) {
763 realnfds++;
764 continue;
765 }
766 if (writefds && FD_ISSET(i, writefds)) {
767 realnfds++;
768 continue;
769 }
770 if (exceptfds && FD_ISSET(i, exceptfds)) {
771 realnfds++;
772 continue;
773 }
774 }
775
776 if (realnfds) {
777 pfds = malloc(sizeof(*pfds) * realnfds);
778 if (!pfds)
779 return -1;
780 } else {
781 pfds = NULL;
782 }
783
784 for (i = 0, j = 0; i < nfds; i++) {
785 incr = 0;
786 pfds[j].events = pfds[j].revents = 0;
787 if (readfds && FD_ISSET(i, readfds)) {
788 pfds[j].fd = i;
789 pfds[j].events |= POLLIN;
790 incr=1;
791 }
792 if (writefds && FD_ISSET(i, writefds)) {
793 pfds[j].fd = i;
794 pfds[j].events |= POLLOUT;
795 incr=1;
796 }
797 if (exceptfds && FD_ISSET(i, exceptfds)) {
798 pfds[j].fd = i;
799 pfds[j].events |= POLLHUP|POLLERR;
800 incr=1;
801 }
802 if (incr)
803 j++;
804 }
805
806 if (timeout) {
807 TIMEVAL_TO_TIMESPEC(timeout, &ts);
808 tsp = &ts;
809 }
810 rv = pollts(pfds, realnfds, tsp, NULL);
811 if (rv <= 0)
812 goto out;
813
814 /*
815 * ok, harvest results. first zero out entries (can't use
816 * FD_ZERO for the obvious select-me-not reason). whee.
817 */
818 for (i = 0; i < nfds; i++) {
819 if (readfds)
820 FD_CLR(i, readfds);
821 if (writefds)
822 FD_CLR(i, writefds);
823 if (exceptfds)
824 FD_CLR(i, exceptfds);
825 }
826
827 /* and then plug in the results */
828 for (i = 0; i < realnfds; i++) {
829 if (readfds) {
830 if (pfds[i].revents & POLLIN) {
831 FD_SET(pfds[i].fd, readfds);
832 }
833 }
834 if (writefds) {
835 if (pfds[i].revents & POLLOUT) {
836 FD_SET(pfds[i].fd, writefds);
837 }
838 }
839 if (exceptfds) {
840 if (pfds[i].revents & (POLLHUP|POLLERR)) {
841 FD_SET(pfds[i].fd, exceptfds);
842 }
843 }
844 }
845
846 out:
847 free(pfds);
848 return rv;
849 }
850
851 static void
852 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
853 {
854 nfds_t i;
855
856 for (i = 0; i < nfds; i++) {
857 if (fds[i].fd == -1)
858 continue;
859
860 if (fd_isrump(fds[i].fd))
861 (*rumpcall)++;
862 else
863 (*hostcall)++;
864 }
865 }
866
867 static void
868 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
869 {
870 nfds_t i;
871
872 for (i = 0; i < nfds; i++) {
873 fds[i].fd = fdadj(fds[i].fd);
874 }
875 }
876
877 /*
878 * poll is easy as long as the call comes in the fds only in one
879 * kernel. otherwise its quite tricky...
880 */
881 struct pollarg {
882 struct pollfd *pfds;
883 nfds_t nfds;
884 const struct timespec *ts;
885 const sigset_t *sigmask;
886 int pipefd;
887 int errnum;
888 };
889
890 static void *
891 hostpoll(void *arg)
892 {
893 struct pollarg *parg = arg;
894 intptr_t rv;
895
896 rv = host_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
897 if (rv == -1)
898 parg->errnum = errno;
899 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
900
901 return (void *)(intptr_t)rv;
902 }
903
904 int
905 POLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
906 const sigset_t *sigmask)
907 {
908 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
909 const sigset_t *);
910 int hostcall = 0, rumpcall = 0;
911 pthread_t pt;
912 nfds_t i;
913 int rv;
914
915 DPRINTF(("poll\n"));
916 checkpoll(fds, nfds, &hostcall, &rumpcall);
917
918 if (hostcall && rumpcall) {
919 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
920 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
921 struct pollarg parg;
922 uintptr_t lrv;
923 int sverrno = 0, trv;
924
925 /*
926 * ok, this is where it gets tricky. We must support
927 * this since it's a very common operation in certain
928 * types of software (telnet, netcat, etc). We allocate
929 * two vectors and run two poll commands in separate
930 * threads. Whichever returns first "wins" and the
931 * other kernel's fds won't show activity.
932 */
933 rv = -1;
934
935 /* allocate full vector for O(n) joining after call */
936 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
937 if (!pfd_host)
938 goto out;
939 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
940 if (!pfd_rump) {
941 goto out;
942 }
943
944 /* split vectors */
945 for (i = 0; i < nfds; i++) {
946 if (fds[i].fd == -1) {
947 pfd_host[i].fd = -1;
948 pfd_rump[i].fd = -1;
949 } else if (fd_isrump(fds[i].fd)) {
950 pfd_host[i].fd = -1;
951 pfd_rump[i].fd = fd_host2rump(fds[i].fd);
952 pfd_rump[i].events = fds[i].events;
953 } else {
954 pfd_rump[i].fd = -1;
955 pfd_host[i].fd = fds[i].fd;
956 pfd_host[i].events = fds[i].events;
957 }
958 fds[i].revents = 0;
959 }
960
961 /*
962 * then, open two pipes, one for notifications
963 * to each kernel.
964 */
965 if (rump_sys_pipe(rpipe) == -1)
966 goto out;
967 if (pipe(hpipe) == -1)
968 goto out;
969
970 pfd_host[nfds].fd = hpipe[0];
971 pfd_host[nfds].events = POLLIN;
972 pfd_rump[nfds].fd = rpipe[0];
973 pfd_rump[nfds].events = POLLIN;
974
975 /*
976 * then, create a thread to do host part and meanwhile
977 * do rump kernel part right here
978 */
979
980 parg.pfds = pfd_host;
981 parg.nfds = nfds+1;
982 parg.ts = ts;
983 parg.sigmask = sigmask;
984 parg.pipefd = rpipe[1];
985 pthread_create(&pt, NULL, hostpoll, &parg);
986
987 op_pollts = rumpcalls[RUMPCALL_POLLTS];
988 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
989 sverrno = errno;
990 write(hpipe[1], &rv, sizeof(rv));
991 pthread_join(pt, (void *)&trv);
992
993 /* check who "won" and merge results */
994 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
995 rv = trv;
996
997 for (i = 0; i < nfds; i++) {
998 if (pfd_rump[i].fd != -1)
999 fds[i].revents = pfd_rump[i].revents;
1000 }
1001 sverrno = parg.errnum;
1002 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
1003 rv = trv;
1004
1005 for (i = 0; i < nfds; i++) {
1006 if (pfd_host[i].fd != -1)
1007 fds[i].revents = pfd_host[i].revents;
1008 }
1009 } else {
1010 rv = 0;
1011 }
1012
1013 out:
1014 if (rpipe[0] != -1)
1015 rump_sys_close(rpipe[0]);
1016 if (rpipe[1] != -1)
1017 rump_sys_close(rpipe[1]);
1018 if (hpipe[0] != -1)
1019 host_close(hpipe[0]);
1020 if (hpipe[1] != -1)
1021 host_close(hpipe[1]);
1022 free(pfd_host);
1023 free(pfd_rump);
1024 errno = sverrno;
1025 } else {
1026 if (hostcall) {
1027 op_pollts = host_pollts;
1028 } else {
1029 op_pollts = rumpcalls[RUMPCALL_POLLTS];
1030 adjustpoll(fds, nfds, fd_host2rump);
1031 }
1032
1033 rv = op_pollts(fds, nfds, ts, sigmask);
1034 if (rumpcall)
1035 adjustpoll(fds, nfds, fd_rump2host);
1036 }
1037
1038 return rv;
1039 }
1040
1041 int
1042 POLL(struct pollfd *fds, nfds_t nfds, int timeout)
1043 {
1044 struct timespec ts;
1045 struct timespec *tsp = NULL;
1046
1047 if (timeout != INFTIM) {
1048 ts.tv_sec = timeout / 1000;
1049 ts.tv_nsec = (timeout % 1000) * 1000*1000;
1050
1051 tsp = &ts;
1052 }
1053
1054 return pollts(fds, nfds, tsp, NULL);
1055 }
1056
1057 #if 0
1058 int
1059 kqueue(void)
1060 {
1061
1062 abort();
1063 }
1064
1065 int
1066 kevent(int kq, const struct kevent *changelist, size_t nchanges,
1067 struct kevent *eventlist, size_t nevents,
1068 const struct timespec *timeout)
1069 {
1070
1071 abort();
1072 }
1073 #endif
1074