hijack.c revision 1.16.2.3 1 1.16.2.3 bouyer /* $NetBSD: hijack.c,v 1.16.2.3 2011/02/17 11:59:23 bouyer Exp $ */
2 1.1 pooka
3 1.1 pooka /*-
4 1.1 pooka * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 1.1 pooka *
6 1.1 pooka * Redistribution and use in source and binary forms, with or without
7 1.1 pooka * modification, are permitted provided that the following conditions
8 1.1 pooka * are met:
9 1.1 pooka * 1. Redistributions of source code must retain the above copyright
10 1.1 pooka * notice, this list of conditions and the following disclaimer.
11 1.1 pooka * 2. Redistributions in binary form must reproduce the above copyright
12 1.1 pooka * notice, this list of conditions and the following disclaimer in the
13 1.1 pooka * documentation and/or other materials provided with the distribution.
14 1.1 pooka *
15 1.1 pooka * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 1.1 pooka * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 1.1 pooka * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 1.1 pooka * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 1.1 pooka * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 1.1 pooka * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 1.1 pooka * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 1.1 pooka * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 1.1 pooka * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 1.1 pooka * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 1.1 pooka * SUCH DAMAGE.
26 1.1 pooka */
27 1.1 pooka
28 1.1 pooka #include <sys/cdefs.h>
29 1.16.2.3 bouyer __RCSID("$NetBSD: hijack.c,v 1.16.2.3 2011/02/17 11:59:23 bouyer Exp $");
30 1.16.2.3 bouyer
31 1.16.2.2 bouyer #define __ssp_weak_name(fun) _hijack_ ## fun
32 1.1 pooka
33 1.1 pooka #include <sys/param.h>
34 1.1 pooka #include <sys/types.h>
35 1.10 pooka #include <sys/event.h>
36 1.1 pooka #include <sys/ioctl.h>
37 1.1 pooka #include <sys/socket.h>
38 1.1 pooka #include <sys/poll.h>
39 1.1 pooka
40 1.1 pooka #include <rump/rumpclient.h>
41 1.1 pooka #include <rump/rump_syscalls.h>
42 1.1 pooka
43 1.1 pooka #include <assert.h>
44 1.1 pooka #include <dlfcn.h>
45 1.1 pooka #include <err.h>
46 1.1 pooka #include <errno.h>
47 1.1 pooka #include <fcntl.h>
48 1.1 pooka #include <poll.h>
49 1.1 pooka #include <pthread.h>
50 1.3 pooka #include <signal.h>
51 1.1 pooka #include <stdarg.h>
52 1.8 pooka #include <stdbool.h>
53 1.1 pooka #include <stdio.h>
54 1.1 pooka #include <stdlib.h>
55 1.16.2.2 bouyer #include <string.h>
56 1.3 pooka #include <time.h>
57 1.1 pooka #include <unistd.h>
58 1.1 pooka
59 1.16.2.2 bouyer enum dualcall {
60 1.16.2.2 bouyer DUALCALL_WRITE, DUALCALL_WRITEV,
61 1.16.2.2 bouyer DUALCALL_IOCTL, DUALCALL_FCNTL,
62 1.16.2.2 bouyer DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT,
63 1.16.2.2 bouyer DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN,
64 1.16.2.2 bouyer DUALCALL_RECVFROM, DUALCALL_RECVMSG,
65 1.16.2.2 bouyer DUALCALL_SENDTO, DUALCALL_SENDMSG,
66 1.16.2.2 bouyer DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT,
67 1.16.2.2 bouyer DUALCALL_SHUTDOWN,
68 1.16.2.2 bouyer DUALCALL_READ, DUALCALL_READV,
69 1.16.2.3 bouyer DUALCALL_DUP2,
70 1.16.2.2 bouyer DUALCALL_CLOSE,
71 1.16.2.2 bouyer DUALCALL_POLLTS,
72 1.16.2.2 bouyer DUALCALL_KEVENT,
73 1.16.2.2 bouyer DUALCALL__NUM
74 1.1 pooka };
75 1.1 pooka
76 1.8 pooka #define RSYS_STRING(a) __STRING(a)
77 1.8 pooka #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
78 1.8 pooka
79 1.1 pooka /*
80 1.14 pooka * Would be nice to get this automatically in sync with libc.
81 1.14 pooka * Also, this does not work for compat-using binaries!
82 1.14 pooka */
83 1.14 pooka #if !__NetBSD_Prereq__(5,99,7)
84 1.16.2.2 bouyer #define REALSELECT select
85 1.16.2.2 bouyer #define REALPOLLTS pollts
86 1.16.2.2 bouyer #define REALKEVENT kevent
87 1.14 pooka #else
88 1.16.2.2 bouyer #define REALSELECT _sys___select50
89 1.16.2.2 bouyer #define REALPOLLTS _sys___pollts50
90 1.16.2.2 bouyer #define REALKEVENT _sys___kevent50
91 1.14 pooka #endif
92 1.16.2.2 bouyer #define REALREAD _sys_read
93 1.14 pooka
94 1.16.2.2 bouyer int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
95 1.16.2.2 bouyer int REALPOLLTS(struct pollfd *, nfds_t,
96 1.16.2.2 bouyer const struct timespec *, const sigset_t *);
97 1.16.2.2 bouyer int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t,
98 1.16.2.2 bouyer const struct timespec *);
99 1.16.2.2 bouyer ssize_t REALREAD(int, void *, size_t);
100 1.16.2.2 bouyer
101 1.16.2.2 bouyer #define S(a) __STRING(a)
102 1.16.2.2 bouyer struct sysnames {
103 1.16.2.2 bouyer enum dualcall scm_callnum;
104 1.16.2.2 bouyer const char *scm_hostname;
105 1.16.2.2 bouyer const char *scm_rumpname;
106 1.16.2.2 bouyer } syscnames[] = {
107 1.16.2.2 bouyer { DUALCALL_SOCKET, "__socket30", RSYS_NAME(SOCKET) },
108 1.16.2.2 bouyer { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) },
109 1.16.2.2 bouyer { DUALCALL_BIND, "bind", RSYS_NAME(BIND) },
110 1.16.2.2 bouyer { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) },
111 1.16.2.2 bouyer { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) },
112 1.16.2.2 bouyer { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) },
113 1.16.2.2 bouyer { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) },
114 1.16.2.2 bouyer { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) },
115 1.16.2.2 bouyer { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) },
116 1.16.2.2 bouyer { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) },
117 1.16.2.2 bouyer { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) },
118 1.16.2.2 bouyer { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) },
119 1.16.2.2 bouyer { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) },
120 1.16.2.2 bouyer { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) },
121 1.16.2.2 bouyer { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) },
122 1.16.2.2 bouyer { DUALCALL_READV, "readv", RSYS_NAME(READV) },
123 1.16.2.2 bouyer { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) },
124 1.16.2.2 bouyer { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) },
125 1.16.2.2 bouyer { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) },
126 1.16.2.2 bouyer { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) },
127 1.16.2.2 bouyer { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) },
128 1.16.2.2 bouyer { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) },
129 1.16.2.2 bouyer { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) },
130 1.16.2.2 bouyer { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) },
131 1.16.2.2 bouyer };
132 1.16.2.2 bouyer #undef S
133 1.7 pooka
134 1.16.2.2 bouyer struct bothsys {
135 1.16.2.2 bouyer void *bs_host;
136 1.16.2.2 bouyer void *bs_rump;
137 1.16.2.2 bouyer } syscalls[DUALCALL__NUM];
138 1.16.2.2 bouyer #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which
139 1.1 pooka
140 1.16.2.2 bouyer pid_t (*host_fork)(void);
141 1.16.2.2 bouyer int (*host_daemon)(int, int);
142 1.16.2.3 bouyer int (*host_execve)(const char *, char *const[], char *const[]);
143 1.1 pooka
144 1.16.2.3 bouyer static uint32_t dup2mask;
145 1.16.2.3 bouyer #define ISDUP2D(fd) (((fd) < 32) && (1<<(fd) & dup2mask))
146 1.16.2.3 bouyer #define SETDUP2(fd) \
147 1.16.2.3 bouyer do { if ((fd) < 32) dup2mask |= (1<<(fd)); } while (/*CONSTCOND*/0)
148 1.16.2.3 bouyer #define CLRDUP2(fd) \
149 1.16.2.3 bouyer do { if ((fd) < 32) dup2mask &= ~(1<<(fd)); } while (/*CONSTCOND*/0)
150 1.5 pooka
151 1.1 pooka //#define DEBUGJACK
152 1.1 pooka #ifdef DEBUGJACK
153 1.5 pooka #define DPRINTF(x) mydprintf x
154 1.5 pooka static void
155 1.5 pooka mydprintf(const char *fmt, ...)
156 1.5 pooka {
157 1.5 pooka va_list ap;
158 1.5 pooka
159 1.5 pooka if (ISDUP2D(STDERR_FILENO))
160 1.5 pooka return;
161 1.5 pooka
162 1.5 pooka va_start(ap, fmt);
163 1.5 pooka vfprintf(stderr, fmt, ap);
164 1.5 pooka va_end(ap);
165 1.5 pooka }
166 1.5 pooka
167 1.1 pooka #else
168 1.1 pooka #define DPRINTF(x)
169 1.1 pooka #endif
170 1.1 pooka
171 1.16.2.2 bouyer #define FDCALL(type, name, rcname, args, proto, vars) \
172 1.16.2.2 bouyer type name args \
173 1.16.2.2 bouyer { \
174 1.16.2.2 bouyer type (*fun) proto; \
175 1.16.2.2 bouyer \
176 1.16.2.2 bouyer DPRINTF(("%s -> %d\n", __STRING(name), fd)); \
177 1.16.2.2 bouyer if (fd_isrump(fd)) { \
178 1.16.2.2 bouyer fun = syscalls[rcname].bs_rump; \
179 1.16.2.2 bouyer fd = fd_host2rump(fd); \
180 1.16.2.2 bouyer } else { \
181 1.16.2.2 bouyer fun = syscalls[rcname].bs_host; \
182 1.16.2.2 bouyer } \
183 1.16.2.2 bouyer \
184 1.16.2.2 bouyer return fun vars; \
185 1.16.2.2 bouyer }
186 1.16.2.2 bouyer
187 1.16.2.2 bouyer /*
188 1.16.2.2 bouyer * This is called from librumpclient in case of LD_PRELOAD.
189 1.16.2.2 bouyer * It ensures correct RTLD_NEXT.
190 1.16.2.2 bouyer *
191 1.16.2.2 bouyer * ... except, it's apparently extremely difficult to force
192 1.16.2.2 bouyer * at least gcc to generate an actual stack frame here. So
193 1.16.2.2 bouyer * sprinkle some volatile foobar and baz to throw the optimizer
194 1.16.2.2 bouyer * off the scent and generate a variable assignment with the
195 1.16.2.2 bouyer * return value. The posterboy for this meltdown is amd64
196 1.16.2.2 bouyer * with -O2. At least with gcc 4.1.3 i386 works regardless of
197 1.16.2.2 bouyer * optimization.
198 1.16.2.2 bouyer */
199 1.16.2.2 bouyer volatile int rumphijack_unrope; /* there, unhang yourself */
200 1.16.2.2 bouyer static void *
201 1.16.2.2 bouyer hijackdlsym(void *handle, const char *symbol)
202 1.16.2.2 bouyer {
203 1.16.2.2 bouyer void *rv;
204 1.16.2.2 bouyer
205 1.16.2.2 bouyer rv = dlsym(handle, symbol);
206 1.16.2.2 bouyer rumphijack_unrope = *(volatile int *)rv;
207 1.16.2.2 bouyer
208 1.16.2.2 bouyer return (void *)rv;
209 1.16.2.2 bouyer }
210 1.16.2.2 bouyer
211 1.16.2.2 bouyer /* low calorie sockets? */
212 1.16.2.2 bouyer static bool hostlocalsockets = true;
213 1.16.2.2 bouyer
214 1.16.2.2 bouyer static void __attribute__((constructor))
215 1.16.2.2 bouyer rcinit(void)
216 1.16.2.2 bouyer {
217 1.16.2.2 bouyer char buf[64];
218 1.16.2.2 bouyer extern void *(*rumpclient_dlsym)(void *, const char *);
219 1.16.2.2 bouyer unsigned i, j;
220 1.16.2.2 bouyer
221 1.16.2.2 bouyer rumpclient_dlsym = hijackdlsym;
222 1.16.2.2 bouyer host_fork = dlsym(RTLD_NEXT, "fork");
223 1.16.2.2 bouyer host_daemon = dlsym(RTLD_NEXT, "daemon");
224 1.16.2.3 bouyer host_execve = dlsym(RTLD_NEXT, "execve");
225 1.16.2.2 bouyer
226 1.16.2.2 bouyer /*
227 1.16.2.2 bouyer * In theory cannot print anything during lookups because
228 1.16.2.2 bouyer * we might not have the call vector set up. so, the errx()
229 1.16.2.2 bouyer * is a bit of a strech, but it might work.
230 1.16.2.2 bouyer */
231 1.16.2.2 bouyer
232 1.16.2.2 bouyer for (i = 0; i < DUALCALL__NUM; i++) {
233 1.16.2.2 bouyer /* build runtime O(1) access */
234 1.16.2.2 bouyer for (j = 0; j < __arraycount(syscnames); j++) {
235 1.16.2.2 bouyer if (syscnames[j].scm_callnum == i)
236 1.16.2.2 bouyer break;
237 1.16.2.2 bouyer }
238 1.16.2.2 bouyer
239 1.16.2.2 bouyer if (j == __arraycount(syscnames))
240 1.16.2.2 bouyer errx(1, "rumphijack error: syscall pos %d missing", i);
241 1.16.2.2 bouyer
242 1.16.2.2 bouyer syscalls[i].bs_host = dlsym(RTLD_NEXT,
243 1.16.2.2 bouyer syscnames[j].scm_hostname);
244 1.16.2.2 bouyer if (syscalls[i].bs_host == NULL)
245 1.16.2.2 bouyer errx(1, "hostcall %s not found missing",
246 1.16.2.2 bouyer syscnames[j].scm_hostname);
247 1.16.2.2 bouyer
248 1.16.2.2 bouyer syscalls[i].bs_rump = dlsym(RTLD_NEXT,
249 1.16.2.2 bouyer syscnames[j].scm_rumpname);
250 1.16.2.2 bouyer if (syscalls[i].bs_rump == NULL)
251 1.16.2.2 bouyer errx(1, "rumpcall %s not found missing",
252 1.16.2.2 bouyer syscnames[j].scm_rumpname);
253 1.16.2.2 bouyer }
254 1.16.2.2 bouyer
255 1.16.2.2 bouyer if (rumpclient_init() == -1)
256 1.16.2.2 bouyer err(1, "rumpclient init");
257 1.16.2.2 bouyer
258 1.16.2.2 bouyer /* set client persistence level */
259 1.16.2.3 bouyer if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) {
260 1.16.2.2 bouyer if (strcmp(buf, "die") == 0)
261 1.16.2.2 bouyer rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE);
262 1.16.2.2 bouyer else if (strcmp(buf, "inftime") == 0)
263 1.16.2.2 bouyer rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME);
264 1.16.2.2 bouyer else if (strcmp(buf, "once") == 0)
265 1.16.2.2 bouyer rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE);
266 1.16.2.2 bouyer else {
267 1.16.2.2 bouyer time_t timeout;
268 1.16.2.3 bouyer char *ep;
269 1.16.2.2 bouyer
270 1.16.2.3 bouyer timeout = (time_t)strtoll(buf, &ep, 10);
271 1.16.2.3 bouyer if (timeout <= 0 || ep != buf + strlen(buf))
272 1.16.2.3 bouyer errx(1, "RUMPHIJACK_RETRYCONNECT must be "
273 1.16.2.3 bouyer "keyword or integer, got: %s", buf);
274 1.16.2.2 bouyer
275 1.16.2.2 bouyer rumpclient_setconnretry(timeout);
276 1.16.2.2 bouyer }
277 1.16.2.2 bouyer }
278 1.16.2.3 bouyer
279 1.16.2.3 bouyer if (getenv_r("RUMPHIJACK__DUP2MASK", buf, sizeof(buf)) == 0) {
280 1.16.2.3 bouyer dup2mask = strtoul(buf, NULL, 10);
281 1.16.2.3 bouyer }
282 1.16.2.2 bouyer }
283 1.16.2.2 bouyer
284 1.2 pooka /* XXX: need runtime selection. low for now due to FD_SETSIZE */
285 1.2 pooka #define HIJACK_FDOFF 128
286 1.2 pooka static int
287 1.2 pooka fd_rump2host(int fd)
288 1.2 pooka {
289 1.2 pooka
290 1.2 pooka if (fd == -1)
291 1.2 pooka return fd;
292 1.2 pooka
293 1.2 pooka if (!ISDUP2D(fd))
294 1.2 pooka fd += HIJACK_FDOFF;
295 1.2 pooka
296 1.2 pooka return fd;
297 1.2 pooka }
298 1.2 pooka
299 1.2 pooka static int
300 1.2 pooka fd_host2rump(int fd)
301 1.2 pooka {
302 1.2 pooka
303 1.2 pooka if (!ISDUP2D(fd))
304 1.2 pooka fd -= HIJACK_FDOFF;
305 1.2 pooka return fd;
306 1.2 pooka }
307 1.2 pooka
308 1.2 pooka static bool
309 1.2 pooka fd_isrump(int fd)
310 1.2 pooka {
311 1.2 pooka
312 1.2 pooka return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
313 1.2 pooka }
314 1.2 pooka
315 1.16.2.3 bouyer #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_FDOFF)
316 1.16.2.3 bouyer
317 1.16.2.3 bouyer static int
318 1.16.2.3 bouyer dodup(int oldd, int minfd)
319 1.16.2.3 bouyer {
320 1.16.2.3 bouyer int (*op_fcntl)(int, int, ...);
321 1.16.2.3 bouyer int newd;
322 1.16.2.3 bouyer int isrump;
323 1.16.2.3 bouyer
324 1.16.2.3 bouyer DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd));
325 1.16.2.3 bouyer if (fd_isrump(oldd)) {
326 1.16.2.3 bouyer op_fcntl = GETSYSCALL(rump, FCNTL);
327 1.16.2.3 bouyer oldd = fd_host2rump(oldd);
328 1.16.2.3 bouyer isrump = 1;
329 1.16.2.3 bouyer } else {
330 1.16.2.3 bouyer op_fcntl = GETSYSCALL(host, FCNTL);
331 1.16.2.3 bouyer isrump = 0;
332 1.16.2.3 bouyer }
333 1.16.2.3 bouyer
334 1.16.2.3 bouyer newd = op_fcntl(oldd, F_DUPFD, minfd);
335 1.16.2.3 bouyer
336 1.16.2.3 bouyer if (isrump)
337 1.16.2.3 bouyer newd = fd_rump2host(newd);
338 1.16.2.3 bouyer DPRINTF(("dup <- %d\n", newd));
339 1.16.2.3 bouyer
340 1.16.2.3 bouyer return newd;
341 1.16.2.3 bouyer }
342 1.2 pooka
343 1.1 pooka int __socket30(int, int, int);
344 1.1 pooka int
345 1.1 pooka __socket30(int domain, int type, int protocol)
346 1.1 pooka {
347 1.16.2.2 bouyer int (*op_socket)(int, int, int);
348 1.1 pooka int fd;
349 1.7 pooka bool dohost;
350 1.7 pooka
351 1.7 pooka dohost = hostlocalsockets && (domain == AF_LOCAL);
352 1.1 pooka
353 1.7 pooka if (dohost)
354 1.16.2.2 bouyer op_socket = GETSYSCALL(host, SOCKET);
355 1.7 pooka else
356 1.16.2.2 bouyer op_socket = GETSYSCALL(rump, SOCKET);
357 1.16.2.2 bouyer fd = op_socket(domain, type, protocol);
358 1.2 pooka
359 1.7 pooka if (!dohost)
360 1.7 pooka fd = fd_rump2host(fd);
361 1.7 pooka DPRINTF(("socket <- %d\n", fd));
362 1.2 pooka
363 1.7 pooka return fd;
364 1.1 pooka }
365 1.1 pooka
366 1.1 pooka int
367 1.1 pooka accept(int s, struct sockaddr *addr, socklen_t *addrlen)
368 1.1 pooka {
369 1.16.2.2 bouyer int (*op_accept)(int, struct sockaddr *, socklen_t *);
370 1.1 pooka int fd;
371 1.7 pooka bool isrump;
372 1.7 pooka
373 1.7 pooka isrump = fd_isrump(s);
374 1.1 pooka
375 1.2 pooka DPRINTF(("accept -> %d", s));
376 1.7 pooka if (isrump) {
377 1.16.2.2 bouyer op_accept = GETSYSCALL(rump, ACCEPT);
378 1.7 pooka s = fd_host2rump(s);
379 1.7 pooka } else {
380 1.16.2.2 bouyer op_accept = GETSYSCALL(host, ACCEPT);
381 1.7 pooka }
382 1.16.2.2 bouyer fd = op_accept(s, addr, addrlen);
383 1.7 pooka if (fd != -1 && isrump)
384 1.7 pooka fd = fd_rump2host(fd);
385 1.7 pooka
386 1.7 pooka DPRINTF((" <- %d\n", fd));
387 1.2 pooka
388 1.7 pooka return fd;
389 1.1 pooka }
390 1.1 pooka
391 1.16.2.2 bouyer /*
392 1.16.2.2 bouyer * ioctl and fcntl are varargs calls and need special treatment
393 1.16.2.2 bouyer */
394 1.1 pooka int
395 1.16.2.2 bouyer ioctl(int fd, unsigned long cmd, ...)
396 1.1 pooka {
397 1.16.2.2 bouyer int (*op_ioctl)(int, unsigned long cmd, ...);
398 1.16.2.2 bouyer va_list ap;
399 1.16.2.2 bouyer int rv;
400 1.1 pooka
401 1.16.2.2 bouyer DPRINTF(("ioctl -> %d\n", fd));
402 1.16.2.2 bouyer if (fd_isrump(fd)) {
403 1.16.2.2 bouyer fd = fd_host2rump(fd);
404 1.16.2.2 bouyer op_ioctl = GETSYSCALL(rump, IOCTL);
405 1.7 pooka } else {
406 1.16.2.2 bouyer op_ioctl = GETSYSCALL(host, IOCTL);
407 1.7 pooka }
408 1.2 pooka
409 1.16.2.2 bouyer va_start(ap, cmd);
410 1.16.2.2 bouyer rv = op_ioctl(fd, cmd, va_arg(ap, void *));
411 1.16.2.2 bouyer va_end(ap);
412 1.16.2.2 bouyer return rv;
413 1.1 pooka }
414 1.1 pooka
415 1.16.2.3 bouyer #include <syslog.h>
416 1.1 pooka int
417 1.16.2.2 bouyer fcntl(int fd, int cmd, ...)
418 1.1 pooka {
419 1.16.2.2 bouyer int (*op_fcntl)(int, int, ...);
420 1.16.2.2 bouyer va_list ap;
421 1.16.2.3 bouyer int rv, minfd, i;
422 1.16.2.3 bouyer
423 1.16.2.3 bouyer DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd));
424 1.16.2.3 bouyer
425 1.16.2.3 bouyer switch (cmd) {
426 1.16.2.3 bouyer case F_DUPFD:
427 1.16.2.3 bouyer va_start(ap, cmd);
428 1.16.2.3 bouyer minfd = va_arg(ap, int);
429 1.16.2.3 bouyer va_end(ap);
430 1.16.2.3 bouyer return dodup(fd, minfd);
431 1.16.2.3 bouyer
432 1.16.2.3 bouyer case F_CLOSEM:
433 1.16.2.3 bouyer /*
434 1.16.2.3 bouyer * So, if fd < HIJACKOFF, we want to do a host closem.
435 1.16.2.3 bouyer */
436 1.16.2.3 bouyer
437 1.16.2.3 bouyer if (fd < HIJACK_FDOFF) {
438 1.16.2.3 bouyer int closemfd = fd;
439 1.16.2.3 bouyer
440 1.16.2.3 bouyer if (rumpclient__closenotify(&closemfd,
441 1.16.2.3 bouyer RUMPCLIENT_CLOSE_FCLOSEM) == -1)
442 1.16.2.3 bouyer return -1;
443 1.16.2.3 bouyer op_fcntl = GETSYSCALL(host, FCNTL);
444 1.16.2.3 bouyer rv = op_fcntl(closemfd, cmd);
445 1.16.2.3 bouyer if (rv)
446 1.16.2.3 bouyer return rv;
447 1.16.2.3 bouyer }
448 1.16.2.3 bouyer
449 1.16.2.3 bouyer /*
450 1.16.2.3 bouyer * Additionally, we want to do a rump closem, but only
451 1.16.2.3 bouyer * for the file descriptors not within the dup2mask.
452 1.16.2.3 bouyer */
453 1.16.2.3 bouyer
454 1.16.2.3 bouyer /* why don't we offer fls()? */
455 1.16.2.3 bouyer for (i = 31; i >= 0; i--) {
456 1.16.2.3 bouyer if (dup2mask & 1<<i)
457 1.16.2.3 bouyer break;
458 1.16.2.3 bouyer }
459 1.16.2.3 bouyer
460 1.16.2.3 bouyer if (fd >= HIJACK_FDOFF)
461 1.16.2.3 bouyer fd -= HIJACK_FDOFF;
462 1.16.2.3 bouyer else
463 1.16.2.3 bouyer fd = 0;
464 1.16.2.3 bouyer fd = MAX(i+1, fd);
465 1.16.2.3 bouyer
466 1.16.2.3 bouyer /* hmm, maybe we should close rump fd's not within dup2mask? */
467 1.16.2.3 bouyer
468 1.16.2.3 bouyer return rump_sys_fcntl(fd, F_CLOSEM);
469 1.16.2.3 bouyer
470 1.16.2.3 bouyer case F_MAXFD:
471 1.16.2.3 bouyer /*
472 1.16.2.3 bouyer * For maxfd, if there's a rump kernel fd, return
473 1.16.2.3 bouyer * it hostified. Otherwise, return host's MAXFD
474 1.16.2.3 bouyer * return value.
475 1.16.2.3 bouyer */
476 1.16.2.3 bouyer if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) {
477 1.16.2.3 bouyer /*
478 1.16.2.3 bouyer * This might go a little wrong in case
479 1.16.2.3 bouyer * of dup2 to [012], but I'm not sure if
480 1.16.2.3 bouyer * there's a justification for tracking
481 1.16.2.3 bouyer * that info. Consider e.g.
482 1.16.2.3 bouyer * dup2(rumpfd, 2) followed by rump_sys_open()
483 1.16.2.3 bouyer * returning 1. We should return 1+HIJACKOFF,
484 1.16.2.3 bouyer * not 2+HIJACKOFF. However, if [01] is not
485 1.16.2.3 bouyer * open, the correct return value is 2.
486 1.16.2.3 bouyer */
487 1.16.2.3 bouyer return fd_rump2host(fd);
488 1.16.2.3 bouyer } else {
489 1.16.2.3 bouyer op_fcntl = GETSYSCALL(host, FCNTL);
490 1.16.2.3 bouyer return op_fcntl(fd, F_MAXFD);
491 1.16.2.3 bouyer }
492 1.16.2.3 bouyer /*NOTREACHED*/
493 1.16.2.3 bouyer
494 1.16.2.3 bouyer default:
495 1.16.2.3 bouyer if (fd_isrump(fd)) {
496 1.16.2.3 bouyer fd = fd_host2rump(fd);
497 1.16.2.3 bouyer op_fcntl = GETSYSCALL(rump, FCNTL);
498 1.16.2.3 bouyer } else {
499 1.16.2.3 bouyer op_fcntl = GETSYSCALL(host, FCNTL);
500 1.16.2.3 bouyer }
501 1.16.2.3 bouyer
502 1.16.2.3 bouyer va_start(ap, cmd);
503 1.16.2.3 bouyer rv = op_fcntl(fd, cmd, va_arg(ap, void *));
504 1.16.2.3 bouyer va_end(ap);
505 1.16.2.3 bouyer return rv;
506 1.16.2.3 bouyer }
507 1.16.2.3 bouyer /*NOTREACHED*/
508 1.16.2.3 bouyer }
509 1.16.2.3 bouyer
510 1.16.2.3 bouyer int
511 1.16.2.3 bouyer close(int fd)
512 1.16.2.3 bouyer {
513 1.16.2.3 bouyer int (*op_close)(int);
514 1.16.2.2 bouyer int rv;
515 1.1 pooka
516 1.16.2.3 bouyer DPRINTF(("close -> %d\n", fd));
517 1.16.2.2 bouyer if (fd_isrump(fd)) {
518 1.16.2.3 bouyer int undup2 = 0;
519 1.16.2.3 bouyer
520 1.16.2.3 bouyer if (ISDUP2D(fd))
521 1.16.2.3 bouyer undup2 = 1;
522 1.16.2.2 bouyer fd = fd_host2rump(fd);
523 1.16.2.3 bouyer op_close = GETSYSCALL(rump, CLOSE);
524 1.16.2.3 bouyer rv = op_close(fd);
525 1.16.2.3 bouyer if (rv == 0 && undup2)
526 1.16.2.3 bouyer CLRDUP2(fd);
527 1.16 pooka } else {
528 1.16.2.3 bouyer if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1)
529 1.16.2.3 bouyer return -1;
530 1.16.2.3 bouyer op_close = GETSYSCALL(host, CLOSE);
531 1.16.2.3 bouyer rv = op_close(fd);
532 1.16 pooka }
533 1.16 pooka
534 1.16.2.2 bouyer return rv;
535 1.1 pooka }
536 1.1 pooka
537 1.16.2.2 bouyer /*
538 1.16.2.2 bouyer * write cannot issue a standard debug printf due to recursion
539 1.16.2.2 bouyer */
540 1.1 pooka ssize_t
541 1.16.2.2 bouyer write(int fd, const void *buf, size_t blen)
542 1.1 pooka {
543 1.16.2.2 bouyer ssize_t (*op_write)(int, const void *, size_t);
544 1.1 pooka
545 1.16.2.2 bouyer if (fd_isrump(fd)) {
546 1.16.2.2 bouyer fd = fd_host2rump(fd);
547 1.16.2.2 bouyer op_write = GETSYSCALL(rump, WRITE);
548 1.7 pooka } else {
549 1.16.2.2 bouyer op_write = GETSYSCALL(host, WRITE);
550 1.7 pooka }
551 1.1 pooka
552 1.16.2.2 bouyer return op_write(fd, buf, blen);
553 1.2 pooka }
554 1.2 pooka
555 1.2 pooka /*
556 1.2 pooka * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
557 1.2 pooka * many programs do that. dup2 of a rump kernel fd to another value
558 1.2 pooka * not >= fdoff is an error.
559 1.2 pooka *
560 1.2 pooka * Note: cannot rump2host newd, because it is often hardcoded.
561 1.2 pooka */
562 1.2 pooka int
563 1.2 pooka dup2(int oldd, int newd)
564 1.2 pooka {
565 1.16.2.2 bouyer int (*host_dup2)(int, int);
566 1.2 pooka int rv;
567 1.2 pooka
568 1.2 pooka DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
569 1.2 pooka
570 1.2 pooka if (fd_isrump(oldd)) {
571 1.2 pooka if (!(newd >= 0 && newd <= 2))
572 1.2 pooka return EBADF;
573 1.2 pooka oldd = fd_host2rump(oldd);
574 1.2 pooka rv = rump_sys_dup2(oldd, newd);
575 1.2 pooka if (rv != -1)
576 1.16.2.3 bouyer SETDUP2(newd);
577 1.2 pooka } else {
578 1.16.2.2 bouyer host_dup2 = syscalls[DUALCALL_DUP2].bs_host;
579 1.16.2.3 bouyer if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1)
580 1.16.2.3 bouyer return -1;
581 1.10 pooka rv = host_dup2(oldd, newd);
582 1.2 pooka }
583 1.10 pooka
584 1.10 pooka return rv;
585 1.2 pooka }
586 1.2 pooka
587 1.16.2.2 bouyer int
588 1.16.2.2 bouyer dup(int oldd)
589 1.16.2.2 bouyer {
590 1.16.2.2 bouyer
591 1.16.2.3 bouyer return dodup(oldd, 0);
592 1.16.2.2 bouyer }
593 1.16.2.2 bouyer
594 1.2 pooka pid_t
595 1.2 pooka fork()
596 1.2 pooka {
597 1.2 pooka pid_t rv;
598 1.2 pooka
599 1.2 pooka DPRINTF(("fork\n"));
600 1.2 pooka
601 1.16.2.3 bouyer rv = rumpclient__dofork(host_fork);
602 1.2 pooka
603 1.2 pooka DPRINTF(("fork returns %d\n", rv));
604 1.2 pooka return rv;
605 1.1 pooka }
606 1.16.2.3 bouyer /* we do not have the luxury of not requiring a stackframe */
607 1.16.2.3 bouyer __strong_alias(__vfork14,fork);
608 1.1 pooka
609 1.1 pooka int
610 1.16.2.2 bouyer daemon(int nochdir, int noclose)
611 1.1 pooka {
612 1.16.2.2 bouyer struct rumpclient_fork *rf;
613 1.1 pooka
614 1.16.2.2 bouyer if ((rf = rumpclient_prefork()) == NULL)
615 1.16.2.2 bouyer return -1;
616 1.1 pooka
617 1.16.2.2 bouyer if (host_daemon(nochdir, noclose) == -1)
618 1.16.2.2 bouyer return -1;
619 1.1 pooka
620 1.16.2.2 bouyer if (rumpclient_fork_init(rf) == -1)
621 1.16.2.2 bouyer return -1;
622 1.1 pooka
623 1.16.2.2 bouyer return 0;
624 1.1 pooka }
625 1.1 pooka
626 1.16.2.3 bouyer int
627 1.16.2.3 bouyer execve(const char *path, char *const argv[], char *const envp[])
628 1.16.2.3 bouyer {
629 1.16.2.3 bouyer char buf[128];
630 1.16.2.3 bouyer char *dup2str;
631 1.16.2.3 bouyer char **newenv;
632 1.16.2.3 bouyer size_t nelem;
633 1.16.2.3 bouyer int rv, sverrno;
634 1.16.2.3 bouyer
635 1.16.2.3 bouyer snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2MASK=%u", dup2mask);
636 1.16.2.3 bouyer dup2str = malloc(strlen(buf)+1);
637 1.16.2.3 bouyer if (dup2str == NULL)
638 1.16.2.3 bouyer return ENOMEM;
639 1.16.2.3 bouyer strcpy(dup2str, buf);
640 1.16.2.3 bouyer
641 1.16.2.3 bouyer for (nelem = 0; envp && envp[nelem]; nelem++)
642 1.16.2.3 bouyer continue;
643 1.16.2.3 bouyer newenv = malloc(sizeof(*newenv) * nelem+2);
644 1.16.2.3 bouyer if (newenv == NULL) {
645 1.16.2.3 bouyer free(dup2str);
646 1.16.2.3 bouyer return ENOMEM;
647 1.16.2.3 bouyer }
648 1.16.2.3 bouyer memcpy(newenv, envp, nelem*sizeof(*newenv));
649 1.16.2.3 bouyer newenv[nelem] = dup2str;
650 1.16.2.3 bouyer newenv[nelem+1] = NULL;
651 1.16.2.3 bouyer
652 1.16.2.3 bouyer rv = rumpclient_exec(path, argv, newenv);
653 1.16.2.3 bouyer
654 1.16.2.3 bouyer _DIAGASSERT(rv != 0);
655 1.16.2.3 bouyer sverrno = errno;
656 1.16.2.3 bouyer free(newenv);
657 1.16.2.3 bouyer free(dup2str);
658 1.16.2.3 bouyer errno = sverrno;
659 1.16.2.3 bouyer return rv;
660 1.16.2.3 bouyer }
661 1.16.2.3 bouyer
662 1.16.2.2 bouyer /*
663 1.16.2.2 bouyer * select is done by calling poll.
664 1.16.2.2 bouyer */
665 1.4 pooka int
666 1.16.2.2 bouyer REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
667 1.4 pooka struct timeval *timeout)
668 1.1 pooka {
669 1.4 pooka struct pollfd *pfds;
670 1.4 pooka struct timespec ts, *tsp = NULL;
671 1.16.2.2 bouyer nfds_t realnfds;
672 1.16.2.2 bouyer int i, j;
673 1.4 pooka int rv, incr;
674 1.4 pooka
675 1.7 pooka DPRINTF(("select\n"));
676 1.7 pooka
677 1.4 pooka /*
678 1.4 pooka * Well, first we must scan the fds to figure out how many
679 1.4 pooka * fds there really are. This is because up to and including
680 1.16.2.2 bouyer * nb5 poll() silently refuses nfds > process_maxopen_fds.
681 1.4 pooka * Seems to be fixed in current, thank the maker.
682 1.4 pooka * god damn cluster...bomb.
683 1.4 pooka */
684 1.4 pooka
685 1.4 pooka for (i = 0, realnfds = 0; i < nfds; i++) {
686 1.4 pooka if (readfds && FD_ISSET(i, readfds)) {
687 1.4 pooka realnfds++;
688 1.4 pooka continue;
689 1.4 pooka }
690 1.4 pooka if (writefds && FD_ISSET(i, writefds)) {
691 1.4 pooka realnfds++;
692 1.4 pooka continue;
693 1.4 pooka }
694 1.4 pooka if (exceptfds && FD_ISSET(i, exceptfds)) {
695 1.4 pooka realnfds++;
696 1.4 pooka continue;
697 1.1 pooka }
698 1.1 pooka }
699 1.1 pooka
700 1.6 pooka if (realnfds) {
701 1.16.2.3 bouyer pfds = calloc(realnfds, sizeof(*pfds));
702 1.6 pooka if (!pfds)
703 1.6 pooka return -1;
704 1.6 pooka } else {
705 1.6 pooka pfds = NULL;
706 1.6 pooka }
707 1.1 pooka
708 1.4 pooka for (i = 0, j = 0; i < nfds; i++) {
709 1.4 pooka incr = 0;
710 1.4 pooka if (readfds && FD_ISSET(i, readfds)) {
711 1.4 pooka pfds[j].fd = i;
712 1.4 pooka pfds[j].events |= POLLIN;
713 1.4 pooka incr=1;
714 1.4 pooka }
715 1.4 pooka if (writefds && FD_ISSET(i, writefds)) {
716 1.4 pooka pfds[j].fd = i;
717 1.4 pooka pfds[j].events |= POLLOUT;
718 1.4 pooka incr=1;
719 1.4 pooka }
720 1.4 pooka if (exceptfds && FD_ISSET(i, exceptfds)) {
721 1.4 pooka pfds[j].fd = i;
722 1.4 pooka pfds[j].events |= POLLHUP|POLLERR;
723 1.4 pooka incr=1;
724 1.1 pooka }
725 1.4 pooka if (incr)
726 1.4 pooka j++;
727 1.1 pooka }
728 1.16.2.3 bouyer assert(j == (int)realnfds);
729 1.1 pooka
730 1.4 pooka if (timeout) {
731 1.4 pooka TIMEVAL_TO_TIMESPEC(timeout, &ts);
732 1.4 pooka tsp = &ts;
733 1.4 pooka }
734 1.16.2.2 bouyer rv = REALPOLLTS(pfds, realnfds, tsp, NULL);
735 1.16.2.3 bouyer /*
736 1.16.2.3 bouyer * "If select() returns with an error the descriptor sets
737 1.16.2.3 bouyer * will be unmodified"
738 1.16.2.3 bouyer */
739 1.16.2.3 bouyer if (rv < 0)
740 1.4 pooka goto out;
741 1.4 pooka
742 1.4 pooka /*
743 1.16.2.3 bouyer * zero out results (can't use FD_ZERO for the
744 1.16.2.3 bouyer * obvious select-me-not reason). whee.
745 1.16.2.3 bouyer *
746 1.16.2.3 bouyer * We do this here since some software ignores the return
747 1.16.2.3 bouyer * value of select, and hence if the timeout expires, it may
748 1.16.2.3 bouyer * assume all input descriptors have activity.
749 1.4 pooka */
750 1.4 pooka for (i = 0; i < nfds; i++) {
751 1.4 pooka if (readfds)
752 1.4 pooka FD_CLR(i, readfds);
753 1.4 pooka if (writefds)
754 1.4 pooka FD_CLR(i, writefds);
755 1.4 pooka if (exceptfds)
756 1.4 pooka FD_CLR(i, exceptfds);
757 1.1 pooka }
758 1.16.2.3 bouyer if (rv == 0)
759 1.16.2.3 bouyer goto out;
760 1.1 pooka
761 1.16.2.3 bouyer /*
762 1.16.2.3 bouyer * We have >0 fds with activity. Harvest the results.
763 1.16.2.3 bouyer */
764 1.16.2.2 bouyer for (i = 0; i < (int)realnfds; i++) {
765 1.4 pooka if (readfds) {
766 1.4 pooka if (pfds[i].revents & POLLIN) {
767 1.4 pooka FD_SET(pfds[i].fd, readfds);
768 1.4 pooka }
769 1.4 pooka }
770 1.4 pooka if (writefds) {
771 1.4 pooka if (pfds[i].revents & POLLOUT) {
772 1.4 pooka FD_SET(pfds[i].fd, writefds);
773 1.4 pooka }
774 1.4 pooka }
775 1.4 pooka if (exceptfds) {
776 1.4 pooka if (pfds[i].revents & (POLLHUP|POLLERR)) {
777 1.4 pooka FD_SET(pfds[i].fd, exceptfds);
778 1.4 pooka }
779 1.4 pooka }
780 1.1 pooka }
781 1.1 pooka
782 1.4 pooka out:
783 1.4 pooka free(pfds);
784 1.1 pooka return rv;
785 1.1 pooka }
786 1.1 pooka
787 1.1 pooka static void
788 1.1 pooka checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
789 1.1 pooka {
790 1.1 pooka nfds_t i;
791 1.1 pooka
792 1.1 pooka for (i = 0; i < nfds; i++) {
793 1.12 pooka if (fds[i].fd == -1)
794 1.12 pooka continue;
795 1.12 pooka
796 1.2 pooka if (fd_isrump(fds[i].fd))
797 1.2 pooka (*rumpcall)++;
798 1.2 pooka else
799 1.1 pooka (*hostcall)++;
800 1.1 pooka }
801 1.1 pooka }
802 1.1 pooka
803 1.1 pooka static void
804 1.2 pooka adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
805 1.1 pooka {
806 1.1 pooka nfds_t i;
807 1.1 pooka
808 1.1 pooka for (i = 0; i < nfds; i++) {
809 1.2 pooka fds[i].fd = fdadj(fds[i].fd);
810 1.1 pooka }
811 1.1 pooka }
812 1.1 pooka
813 1.1 pooka /*
814 1.1 pooka * poll is easy as long as the call comes in the fds only in one
815 1.1 pooka * kernel. otherwise its quite tricky...
816 1.1 pooka */
817 1.1 pooka struct pollarg {
818 1.1 pooka struct pollfd *pfds;
819 1.1 pooka nfds_t nfds;
820 1.3 pooka const struct timespec *ts;
821 1.3 pooka const sigset_t *sigmask;
822 1.1 pooka int pipefd;
823 1.1 pooka int errnum;
824 1.1 pooka };
825 1.1 pooka
826 1.1 pooka static void *
827 1.1 pooka hostpoll(void *arg)
828 1.1 pooka {
829 1.16.2.2 bouyer int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
830 1.16.2.2 bouyer const sigset_t *);
831 1.1 pooka struct pollarg *parg = arg;
832 1.1 pooka intptr_t rv;
833 1.1 pooka
834 1.16.2.3 bouyer op_pollts = GETSYSCALL(host, POLLTS);
835 1.16.2.2 bouyer rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
836 1.1 pooka if (rv == -1)
837 1.1 pooka parg->errnum = errno;
838 1.1 pooka rump_sys_write(parg->pipefd, &rv, sizeof(rv));
839 1.1 pooka
840 1.1 pooka return (void *)(intptr_t)rv;
841 1.1 pooka }
842 1.1 pooka
843 1.1 pooka int
844 1.16.2.2 bouyer REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
845 1.3 pooka const sigset_t *sigmask)
846 1.1 pooka {
847 1.3 pooka int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
848 1.3 pooka const sigset_t *);
849 1.16.2.2 bouyer int (*host_close)(int);
850 1.1 pooka int hostcall = 0, rumpcall = 0;
851 1.1 pooka pthread_t pt;
852 1.1 pooka nfds_t i;
853 1.1 pooka int rv;
854 1.1 pooka
855 1.2 pooka DPRINTF(("poll\n"));
856 1.1 pooka checkpoll(fds, nfds, &hostcall, &rumpcall);
857 1.1 pooka
858 1.1 pooka if (hostcall && rumpcall) {
859 1.1 pooka struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
860 1.1 pooka int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
861 1.1 pooka struct pollarg parg;
862 1.1 pooka uintptr_t lrv;
863 1.1 pooka int sverrno = 0, trv;
864 1.1 pooka
865 1.1 pooka /*
866 1.1 pooka * ok, this is where it gets tricky. We must support
867 1.1 pooka * this since it's a very common operation in certain
868 1.1 pooka * types of software (telnet, netcat, etc). We allocate
869 1.1 pooka * two vectors and run two poll commands in separate
870 1.1 pooka * threads. Whichever returns first "wins" and the
871 1.1 pooka * other kernel's fds won't show activity.
872 1.1 pooka */
873 1.1 pooka rv = -1;
874 1.1 pooka
875 1.1 pooka /* allocate full vector for O(n) joining after call */
876 1.1 pooka pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
877 1.1 pooka if (!pfd_host)
878 1.1 pooka goto out;
879 1.1 pooka pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
880 1.1 pooka if (!pfd_rump) {
881 1.1 pooka goto out;
882 1.1 pooka }
883 1.1 pooka
884 1.1 pooka /* split vectors */
885 1.1 pooka for (i = 0; i < nfds; i++) {
886 1.3 pooka if (fds[i].fd == -1) {
887 1.3 pooka pfd_host[i].fd = -1;
888 1.3 pooka pfd_rump[i].fd = -1;
889 1.3 pooka } else if (fd_isrump(fds[i].fd)) {
890 1.2 pooka pfd_host[i].fd = -1;
891 1.2 pooka pfd_rump[i].fd = fd_host2rump(fds[i].fd);
892 1.2 pooka pfd_rump[i].events = fds[i].events;
893 1.2 pooka } else {
894 1.2 pooka pfd_rump[i].fd = -1;
895 1.1 pooka pfd_host[i].fd = fds[i].fd;
896 1.1 pooka pfd_host[i].events = fds[i].events;
897 1.1 pooka }
898 1.16.2.3 bouyer pfd_rump[i].revents = pfd_host[i].revents = 0;
899 1.13 pooka fds[i].revents = 0;
900 1.1 pooka }
901 1.1 pooka
902 1.1 pooka /*
903 1.1 pooka * then, open two pipes, one for notifications
904 1.1 pooka * to each kernel.
905 1.1 pooka */
906 1.1 pooka if (rump_sys_pipe(rpipe) == -1)
907 1.1 pooka goto out;
908 1.1 pooka if (pipe(hpipe) == -1)
909 1.1 pooka goto out;
910 1.1 pooka
911 1.1 pooka pfd_host[nfds].fd = hpipe[0];
912 1.1 pooka pfd_host[nfds].events = POLLIN;
913 1.1 pooka pfd_rump[nfds].fd = rpipe[0];
914 1.1 pooka pfd_rump[nfds].events = POLLIN;
915 1.1 pooka
916 1.1 pooka /*
917 1.1 pooka * then, create a thread to do host part and meanwhile
918 1.1 pooka * do rump kernel part right here
919 1.1 pooka */
920 1.1 pooka
921 1.1 pooka parg.pfds = pfd_host;
922 1.1 pooka parg.nfds = nfds+1;
923 1.3 pooka parg.ts = ts;
924 1.3 pooka parg.sigmask = sigmask;
925 1.1 pooka parg.pipefd = rpipe[1];
926 1.1 pooka pthread_create(&pt, NULL, hostpoll, &parg);
927 1.1 pooka
928 1.16.2.3 bouyer op_pollts = GETSYSCALL(rump, POLLTS);
929 1.3 pooka lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
930 1.1 pooka sverrno = errno;
931 1.1 pooka write(hpipe[1], &rv, sizeof(rv));
932 1.1 pooka pthread_join(pt, (void *)&trv);
933 1.1 pooka
934 1.1 pooka /* check who "won" and merge results */
935 1.1 pooka if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
936 1.1 pooka rv = trv;
937 1.1 pooka
938 1.1 pooka for (i = 0; i < nfds; i++) {
939 1.1 pooka if (pfd_rump[i].fd != -1)
940 1.1 pooka fds[i].revents = pfd_rump[i].revents;
941 1.1 pooka }
942 1.1 pooka sverrno = parg.errnum;
943 1.1 pooka } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
944 1.1 pooka rv = trv;
945 1.1 pooka
946 1.1 pooka for (i = 0; i < nfds; i++) {
947 1.1 pooka if (pfd_host[i].fd != -1)
948 1.1 pooka fds[i].revents = pfd_host[i].revents;
949 1.1 pooka }
950 1.1 pooka } else {
951 1.1 pooka rv = 0;
952 1.1 pooka }
953 1.1 pooka
954 1.1 pooka out:
955 1.16.2.3 bouyer host_close = GETSYSCALL(host, CLOSE);
956 1.1 pooka if (rpipe[0] != -1)
957 1.1 pooka rump_sys_close(rpipe[0]);
958 1.1 pooka if (rpipe[1] != -1)
959 1.1 pooka rump_sys_close(rpipe[1]);
960 1.1 pooka if (hpipe[0] != -1)
961 1.9 pooka host_close(hpipe[0]);
962 1.1 pooka if (hpipe[1] != -1)
963 1.9 pooka host_close(hpipe[1]);
964 1.1 pooka free(pfd_host);
965 1.1 pooka free(pfd_rump);
966 1.1 pooka errno = sverrno;
967 1.1 pooka } else {
968 1.1 pooka if (hostcall) {
969 1.16.2.3 bouyer op_pollts = GETSYSCALL(host, POLLTS);
970 1.1 pooka } else {
971 1.16.2.3 bouyer op_pollts = GETSYSCALL(rump, POLLTS);
972 1.2 pooka adjustpoll(fds, nfds, fd_host2rump);
973 1.1 pooka }
974 1.1 pooka
975 1.3 pooka rv = op_pollts(fds, nfds, ts, sigmask);
976 1.1 pooka if (rumpcall)
977 1.2 pooka adjustpoll(fds, nfds, fd_rump2host);
978 1.1 pooka }
979 1.1 pooka
980 1.1 pooka return rv;
981 1.1 pooka }
982 1.1 pooka
983 1.1 pooka int
984 1.16.2.2 bouyer poll(struct pollfd *fds, nfds_t nfds, int timeout)
985 1.1 pooka {
986 1.3 pooka struct timespec ts;
987 1.3 pooka struct timespec *tsp = NULL;
988 1.3 pooka
989 1.3 pooka if (timeout != INFTIM) {
990 1.3 pooka ts.tv_sec = timeout / 1000;
991 1.11 pooka ts.tv_nsec = (timeout % 1000) * 1000*1000;
992 1.3 pooka
993 1.3 pooka tsp = &ts;
994 1.3 pooka }
995 1.1 pooka
996 1.16.2.2 bouyer return REALPOLLTS(fds, nfds, tsp, NULL);
997 1.1 pooka }
998 1.10 pooka
999 1.10 pooka int
1000 1.16.2.2 bouyer REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges,
1001 1.16.2.2 bouyer struct kevent *eventlist, size_t nevents,
1002 1.16.2.2 bouyer const struct timespec *timeout)
1003 1.10 pooka {
1004 1.16.2.2 bouyer int (*op_kevent)(int, const struct kevent *, size_t,
1005 1.16.2.2 bouyer struct kevent *, size_t, const struct timespec *);
1006 1.16.2.2 bouyer const struct kevent *ev;
1007 1.16.2.2 bouyer size_t i;
1008 1.10 pooka
1009 1.16.2.2 bouyer /*
1010 1.16.2.2 bouyer * Check that we don't attempt to kevent rump kernel fd's.
1011 1.16.2.2 bouyer * That needs similar treatment to select/poll, but is slightly
1012 1.16.2.2 bouyer * trickier since we need to manage to different kq descriptors.
1013 1.16.2.2 bouyer * (TODO, in case you're wondering).
1014 1.16.2.2 bouyer */
1015 1.16.2.2 bouyer for (i = 0; i < nchanges; i++) {
1016 1.16.2.2 bouyer ev = &changelist[i];
1017 1.16.2.2 bouyer if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE ||
1018 1.16.2.2 bouyer ev->filter == EVFILT_VNODE) {
1019 1.16.2.3 bouyer if (fd_isrump((int)ev->ident))
1020 1.16.2.2 bouyer return ENOTSUP;
1021 1.16.2.2 bouyer }
1022 1.16.2.2 bouyer }
1023 1.16.2.2 bouyer
1024 1.16.2.3 bouyer op_kevent = GETSYSCALL(host, KEVENT);
1025 1.16.2.2 bouyer return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout);
1026 1.10 pooka }
1027 1.10 pooka
1028 1.16.2.2 bouyer /*
1029 1.16.2.2 bouyer * Rest are std type calls.
1030 1.16.2.2 bouyer */
1031 1.16.2.2 bouyer
1032 1.16.2.2 bouyer FDCALL(int, bind, DUALCALL_BIND, \
1033 1.16.2.2 bouyer (int fd, const struct sockaddr *name, socklen_t namelen), \
1034 1.16.2.2 bouyer (int, const struct sockaddr *, socklen_t), \
1035 1.16.2.2 bouyer (fd, name, namelen))
1036 1.16.2.2 bouyer
1037 1.16.2.2 bouyer FDCALL(int, connect, DUALCALL_CONNECT, \
1038 1.16.2.2 bouyer (int fd, const struct sockaddr *name, socklen_t namelen), \
1039 1.16.2.2 bouyer (int, const struct sockaddr *, socklen_t), \
1040 1.16.2.2 bouyer (fd, name, namelen))
1041 1.16.2.2 bouyer
1042 1.16.2.2 bouyer FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \
1043 1.16.2.2 bouyer (int fd, struct sockaddr *name, socklen_t *namelen), \
1044 1.16.2.2 bouyer (int, struct sockaddr *, socklen_t *), \
1045 1.16.2.2 bouyer (fd, name, namelen))
1046 1.16.2.2 bouyer
1047 1.16.2.2 bouyer FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \
1048 1.16.2.2 bouyer (int fd, struct sockaddr *name, socklen_t *namelen), \
1049 1.16.2.2 bouyer (int, struct sockaddr *, socklen_t *), \
1050 1.16.2.2 bouyer (fd, name, namelen))
1051 1.16.2.2 bouyer
1052 1.16.2.2 bouyer FDCALL(int, listen, DUALCALL_LISTEN, \
1053 1.16.2.2 bouyer (int fd, int backlog), \
1054 1.16.2.2 bouyer (int, int), \
1055 1.16.2.2 bouyer (fd, backlog))
1056 1.16.2.2 bouyer
1057 1.16.2.2 bouyer FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \
1058 1.16.2.2 bouyer (int fd, void *buf, size_t len, int flags, \
1059 1.16.2.2 bouyer struct sockaddr *from, socklen_t *fromlen), \
1060 1.16.2.2 bouyer (int, void *, size_t, int, struct sockaddr *, socklen_t *), \
1061 1.16.2.2 bouyer (fd, buf, len, flags, from, fromlen))
1062 1.16.2.2 bouyer
1063 1.16.2.2 bouyer FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \
1064 1.16.2.2 bouyer (int fd, const void *buf, size_t len, int flags, \
1065 1.16.2.2 bouyer const struct sockaddr *to, socklen_t tolen), \
1066 1.16.2.2 bouyer (int, const void *, size_t, int, \
1067 1.16.2.2 bouyer const struct sockaddr *, socklen_t), \
1068 1.16.2.2 bouyer (fd, buf, len, flags, to, tolen))
1069 1.16.2.2 bouyer
1070 1.16.2.2 bouyer FDCALL(ssize_t, recvmsg, DUALCALL_RECVMSG, \
1071 1.16.2.2 bouyer (int fd, struct msghdr *msg, int flags), \
1072 1.16.2.2 bouyer (int, struct msghdr *, int), \
1073 1.16.2.2 bouyer (fd, msg, flags))
1074 1.16.2.2 bouyer
1075 1.16.2.2 bouyer FDCALL(ssize_t, sendmsg, DUALCALL_SENDMSG, \
1076 1.16.2.2 bouyer (int fd, const struct msghdr *msg, int flags), \
1077 1.16.2.2 bouyer (int, const struct msghdr *, int), \
1078 1.16.2.2 bouyer (fd, msg, flags))
1079 1.16.2.2 bouyer
1080 1.16.2.2 bouyer FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \
1081 1.16.2.2 bouyer (int fd, int level, int optn, void *optval, socklen_t *optlen), \
1082 1.16.2.2 bouyer (int, int, int, void *, socklen_t *), \
1083 1.16.2.2 bouyer (fd, level, optn, optval, optlen))
1084 1.16.2.2 bouyer
1085 1.16.2.2 bouyer FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \
1086 1.16.2.2 bouyer (int fd, int level, int optn, \
1087 1.16.2.2 bouyer const void *optval, socklen_t optlen), \
1088 1.16.2.2 bouyer (int, int, int, const void *, socklen_t), \
1089 1.16.2.2 bouyer (fd, level, optn, optval, optlen))
1090 1.16.2.2 bouyer
1091 1.16.2.2 bouyer FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \
1092 1.16.2.2 bouyer (int fd, int how), \
1093 1.16.2.2 bouyer (int, int), \
1094 1.16.2.2 bouyer (fd, how))
1095 1.16.2.2 bouyer
1096 1.16.2.2 bouyer #if _FORTIFY_SOURCE > 0
1097 1.16.2.2 bouyer #define STUB(fun) __ssp_weak_name(fun)
1098 1.16.2.2 bouyer ssize_t _sys_readlink(const char * __restrict, char * __restrict, size_t);
1099 1.16.2.2 bouyer ssize_t
1100 1.16.2.2 bouyer STUB(readlink)(const char * __restrict path, char * __restrict buf,
1101 1.16.2.2 bouyer size_t bufsiz)
1102 1.10 pooka {
1103 1.16.2.2 bouyer return _sys_readlink(path, buf, bufsiz);
1104 1.16.2.2 bouyer }
1105 1.10 pooka
1106 1.16.2.2 bouyer char *_sys_getcwd(char *, size_t);
1107 1.16.2.2 bouyer char *
1108 1.16.2.2 bouyer STUB(getcwd)(char *buf, size_t size)
1109 1.16.2.2 bouyer {
1110 1.16.2.2 bouyer return _sys_getcwd(buf, size);
1111 1.10 pooka }
1112 1.16.2.2 bouyer #else
1113 1.16.2.2 bouyer #define STUB(fun) fun
1114 1.16.2.1 bouyer #endif
1115 1.16.2.2 bouyer
1116 1.16.2.2 bouyer FDCALL(ssize_t, REALREAD, DUALCALL_READ, \
1117 1.16.2.2 bouyer (int fd, void *buf, size_t buflen), \
1118 1.16.2.2 bouyer (int, void *, size_t), \
1119 1.16.2.2 bouyer (fd, buf, buflen))
1120 1.16.2.2 bouyer
1121 1.16.2.2 bouyer FDCALL(ssize_t, readv, DUALCALL_READV, \
1122 1.16.2.2 bouyer (int fd, const struct iovec *iov, int iovcnt), \
1123 1.16.2.2 bouyer (int, const struct iovec *, int), \
1124 1.16.2.2 bouyer (fd, iov, iovcnt))
1125 1.16.2.2 bouyer
1126 1.16.2.2 bouyer FDCALL(ssize_t, writev, DUALCALL_WRITEV, \
1127 1.16.2.2 bouyer (int fd, const struct iovec *iov, int iovcnt), \
1128 1.16.2.2 bouyer (int, const struct iovec *, int), \
1129 1.16.2.2 bouyer (fd, iov, iovcnt))
1130