hijack.c revision 1.55 1 /* $NetBSD: hijack.c,v 1.55 2011/02/19 13:07:53 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __RCSID("$NetBSD: hijack.c,v 1.55 2011/02/19 13:07:53 pooka Exp $");
30
31 #define __ssp_weak_name(fun) _hijack_ ## fun
32
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/event.h>
36 #include <sys/ioctl.h>
37 #include <sys/mount.h>
38 #include <sys/poll.h>
39 #include <sys/socket.h>
40 #include <sys/statvfs.h>
41
42 #include <rump/rumpclient.h>
43 #include <rump/rump_syscalls.h>
44
45 #include <assert.h>
46 #include <dlfcn.h>
47 #include <err.h>
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <poll.h>
51 #include <pthread.h>
52 #include <signal.h>
53 #include <stdarg.h>
54 #include <stdbool.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <time.h>
59 #include <unistd.h>
60
61 enum dualcall {
62 DUALCALL_WRITE, DUALCALL_WRITEV,
63 DUALCALL_IOCTL, DUALCALL_FCNTL,
64 DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT,
65 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN,
66 DUALCALL_RECVFROM, DUALCALL_RECVMSG,
67 DUALCALL_SENDTO, DUALCALL_SENDMSG,
68 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT,
69 DUALCALL_SHUTDOWN,
70 DUALCALL_READ, DUALCALL_READV,
71 DUALCALL_DUP2,
72 DUALCALL_CLOSE,
73 DUALCALL_POLLTS,
74 DUALCALL_KEVENT,
75 DUALCALL_STAT, DUALCALL_LSTAT, DUALCALL_FSTAT,
76 DUALCALL_CHMOD, DUALCALL_LCHMOD, DUALCALL_FCHMOD,
77 DUALCALL_CHOWN, DUALCALL_LCHOWN, DUALCALL_FCHOWN,
78 DUALCALL_OPEN,
79 DUALCALL_STATVFS1, DUALCALL_FSTATVFS1,
80 DUALCALL_CHDIR, DUALCALL_FCHDIR,
81 DUALCALL_LSEEK,
82 DUALCALL_GETDENTS,
83 DUALCALL_UNLINK, DUALCALL_SYMLINK, DUALCALL_READLINK,
84 DUALCALL_RENAME,
85 DUALCALL_MKDIR, DUALCALL_RMDIR,
86 DUALCALL_UTIMES, DUALCALL_LUTIMES, DUALCALL_FUTIMES,
87 DUALCALL_TRUNCATE, DUALCALL_FTRUNCATE,
88 DUALCALL_FSYNC, DUALCALL_FSYNC_RANGE,
89 DUALCALL_MOUNT, DUALCALL_UNMOUNT,
90 DUALCALL__NUM
91 };
92
93 #define RSYS_STRING(a) __STRING(a)
94 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
95
96 /*
97 * Would be nice to get this automatically in sync with libc.
98 * Also, this does not work for compat-using binaries!
99 */
100 #if !__NetBSD_Prereq__(5,99,7)
101 #define REALSELECT select
102 #define REALPOLLTS pollts
103 #define REALKEVENT kevent
104 #define REALSTAT __stat30
105 #define REALLSTAT __lstat30
106 #define REALFSTAT __fstat30
107 #define REALUTIMES utimes
108 #define REALLUTIMES lutimes
109 #define REALFUTIMES futimes
110 #else
111 #define REALSELECT _sys___select50
112 #define REALPOLLTS _sys___pollts50
113 #define REALKEVENT _sys___kevent50
114 #define REALSTAT __stat50
115 #define REALLSTAT __lstat50
116 #define REALFSTAT __fstat50
117 #define REALUTIMES __utimes50
118 #define REALLUTIMES __lutimes50
119 #define REALFUTIMES __futimes50
120 #endif
121 #define REALREAD _sys_read
122 #define REALGETDENTS __getdents30
123 #define REALMOUNT __mount50
124 #define REALLSEEK _lseek
125
126 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
127 int REALPOLLTS(struct pollfd *, nfds_t,
128 const struct timespec *, const sigset_t *);
129 int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t,
130 const struct timespec *);
131 ssize_t REALREAD(int, void *, size_t);
132 int REALSTAT(const char *, struct stat *);
133 int REALLSTAT(const char *, struct stat *);
134 int REALFSTAT(int, struct stat *);
135 int REALGETDENTS(int, char *, size_t);
136 int REALUTIMES(const char *, const struct timeval [2]);
137 int REALLUTIMES(const char *, const struct timeval [2]);
138 int REALFUTIMES(int, const struct timeval [2]);
139 int REALMOUNT(const char *, const char *, int, void *, size_t);
140 off_t REALLSEEK(int, off_t, int);
141
142 #define S(a) __STRING(a)
143 struct sysnames {
144 enum dualcall scm_callnum;
145 const char *scm_hostname;
146 const char *scm_rumpname;
147 } syscnames[] = {
148 { DUALCALL_SOCKET, "__socket30", RSYS_NAME(SOCKET) },
149 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) },
150 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) },
151 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) },
152 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) },
153 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) },
154 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) },
155 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) },
156 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) },
157 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) },
158 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) },
159 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) },
160 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) },
161 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) },
162 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) },
163 { DUALCALL_READV, "readv", RSYS_NAME(READV) },
164 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) },
165 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) },
166 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) },
167 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) },
168 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) },
169 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) },
170 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) },
171 { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) },
172 { DUALCALL_STAT, S(REALSTAT), RSYS_NAME(STAT) },
173 { DUALCALL_LSTAT, S(REALLSTAT), RSYS_NAME(LSTAT) },
174 { DUALCALL_FSTAT, S(REALFSTAT), RSYS_NAME(FSTAT) },
175 { DUALCALL_CHOWN, "chown", RSYS_NAME(CHOWN) },
176 { DUALCALL_LCHOWN, "lchown", RSYS_NAME(LCHOWN) },
177 { DUALCALL_FCHOWN, "fchown", RSYS_NAME(FCHOWN) },
178 { DUALCALL_CHMOD, "chmod", RSYS_NAME(CHMOD) },
179 { DUALCALL_LCHMOD, "lchmod", RSYS_NAME(LCHMOD) },
180 { DUALCALL_FCHMOD, "fchmod", RSYS_NAME(FCHMOD) },
181 { DUALCALL_UTIMES, S(REALUTIMES), RSYS_NAME(UTIMES) },
182 { DUALCALL_LUTIMES, S(REALLUTIMES), RSYS_NAME(LUTIMES) },
183 { DUALCALL_FUTIMES, S(REALFUTIMES), RSYS_NAME(FUTIMES) },
184 { DUALCALL_OPEN, "open", RSYS_NAME(OPEN) },
185 { DUALCALL_STATVFS1, "statvfs1", RSYS_NAME(STATVFS1) },
186 { DUALCALL_FSTATVFS1, "fstatvfs1", RSYS_NAME(FSTATVFS1) },
187 { DUALCALL_CHDIR, "chdir", RSYS_NAME(CHDIR) },
188 { DUALCALL_FCHDIR, "fchdir", RSYS_NAME(FCHDIR) },
189 { DUALCALL_LSEEK, S(REALLSEEK), RSYS_NAME(LSEEK) },
190 { DUALCALL_GETDENTS, "__getdents30", RSYS_NAME(GETDENTS) },
191 { DUALCALL_UNLINK, "unlink", RSYS_NAME(UNLINK) },
192 { DUALCALL_SYMLINK, "symlink", RSYS_NAME(SYMLINK) },
193 { DUALCALL_READLINK, "readlink", RSYS_NAME(READLINK) },
194 { DUALCALL_RENAME, "rename", RSYS_NAME(RENAME) },
195 { DUALCALL_MKDIR, "mkdir", RSYS_NAME(MKDIR) },
196 { DUALCALL_RMDIR, "rmdir", RSYS_NAME(RMDIR) },
197 { DUALCALL_TRUNCATE, "truncate", RSYS_NAME(TRUNCATE) },
198 { DUALCALL_FTRUNCATE, "ftruncate", RSYS_NAME(FTRUNCATE) },
199 { DUALCALL_FSYNC, "fsync", RSYS_NAME(FSYNC) },
200 { DUALCALL_FSYNC_RANGE, "fsync_range", RSYS_NAME(FSYNC_RANGE) },
201 { DUALCALL_MOUNT, S(REALMOUNT), RSYS_NAME(MOUNT) },
202 { DUALCALL_UNMOUNT, "unmount", RSYS_NAME(UNMOUNT) },
203 };
204 #undef S
205
206 struct bothsys {
207 void *bs_host;
208 void *bs_rump;
209 } syscalls[DUALCALL__NUM];
210 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which
211
212 pid_t (*host_fork)(void);
213 int (*host_daemon)(int, int);
214 int (*host_execve)(const char *, char *const[], char *const[]);
215
216 /* ok, we need *two* bits per dup2'd fd to track fd+HIJACKOFF aliases */
217 static uint32_t dup2mask;
218 #define ISDUP2D(fd) (((fd) < 16) && (1<<(fd) & dup2mask))
219 #define SETDUP2(fd) \
220 do { if ((fd) < 16) dup2mask |= (1<<(fd)); } while (/*CONSTCOND*/0)
221 #define CLRDUP2(fd) \
222 do { if ((fd) < 16) dup2mask &= ~(1<<(fd)); } while (/*CONSTCOND*/0)
223 #define ISDUP2ALIAS(fd) (((fd) < 16) && (1<<((fd)+16) & dup2mask))
224 #define SETDUP2ALIAS(fd) \
225 do { if ((fd) < 16) dup2mask |= (1<<((fd)+16)); } while (/*CONSTCOND*/0)
226 #define CLRDUP2ALIAS(fd) \
227 do { if ((fd) < 16) dup2mask &= ~(1<<((fd)+16)); } while (/*CONSTCOND*/0)
228
229 //#define DEBUGJACK
230 #ifdef DEBUGJACK
231 #define DPRINTF(x) mydprintf x
232 static void
233 mydprintf(const char *fmt, ...)
234 {
235 va_list ap;
236
237 if (ISDUP2D(STDERR_FILENO))
238 return;
239
240 va_start(ap, fmt);
241 vfprintf(stderr, fmt, ap);
242 va_end(ap);
243 }
244
245 #else
246 #define DPRINTF(x)
247 #endif
248
249 #define FDCALL(type, name, rcname, args, proto, vars) \
250 type name args \
251 { \
252 type (*fun) proto; \
253 \
254 DPRINTF(("%s -> %d\n", __STRING(name), fd)); \
255 if (fd_isrump(fd)) { \
256 fun = syscalls[rcname].bs_rump; \
257 fd = fd_host2rump(fd); \
258 } else { \
259 fun = syscalls[rcname].bs_host; \
260 } \
261 \
262 return fun vars; \
263 }
264
265 #define PATHCALL(type, name, rcname, args, proto, vars) \
266 type name args \
267 { \
268 type (*fun) proto; \
269 \
270 DPRINTF(("%s -> %s\n", __STRING(name), path)); \
271 if (path_isrump(path)) { \
272 fun = syscalls[rcname].bs_rump; \
273 path = path_host2rump(path); \
274 } else { \
275 fun = syscalls[rcname].bs_host; \
276 } \
277 \
278 return fun vars; \
279 }
280
281 /*
282 * This is called from librumpclient in case of LD_PRELOAD.
283 * It ensures correct RTLD_NEXT.
284 *
285 * ... except, it's apparently extremely difficult to force
286 * at least gcc to generate an actual stack frame here. So
287 * sprinkle some volatile foobar and baz to throw the optimizer
288 * off the scent and generate a variable assignment with the
289 * return value. The posterboy for this meltdown is amd64
290 * with -O2. At least with gcc 4.1.3 i386 works regardless of
291 * optimization.
292 */
293 volatile int rumphijack_unrope; /* there, unhang yourself */
294 static void *
295 hijackdlsym(void *handle, const char *symbol)
296 {
297 void *rv;
298
299 rv = dlsym(handle, symbol);
300 rumphijack_unrope = *(volatile int *)rv;
301
302 return (void *)rv;
303 }
304
305 /*
306 * This tracks if our process is in a subdirectory of /rump.
307 * It's preserved over exec.
308 */
309 static bool pwdinrump = false;
310
311 /*
312 * These variables are set from the RUMPHIJACK string and control
313 * which operations can product rump kernel file descriptors.
314 * This should be easily extendable for future needs.
315 */
316 #define RUMPHIJACK_DEFAULT "path=/rump,socket=all:nolocal"
317 static bool rumpsockets[PF_MAX];
318 static const char *rumpprefix;
319 static size_t rumpprefixlen;
320
321 static struct {
322 int pf;
323 const char *name;
324 } socketmap[] = {
325 { PF_LOCAL, "local" },
326 { PF_INET, "inet" },
327 { PF_LINK, "link" },
328 #ifdef PF_OROUTE
329 { PF_OROUTE, "oroute" },
330 #endif
331 { PF_ROUTE, "route" },
332 { PF_INET6, "inet6" },
333 #ifdef PF_MPLS
334 { PF_MPLS, "mpls" },
335 #endif
336 { -1, NULL }
337 };
338
339 static void
340 sockparser(char *buf)
341 {
342 char *p, *l;
343 bool value;
344 int i;
345
346 /* if "all" is present, it must be specified first */
347 if (strncmp(buf, "all", strlen("all")) == 0) {
348 for (i = 0; i < (int)__arraycount(rumpsockets); i++) {
349 rumpsockets[i] = true;
350 }
351 buf += strlen("all");
352 if (*buf == ':')
353 buf++;
354 }
355
356 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) {
357 value = true;
358 if (strncmp(p, "no", strlen("no")) == 0) {
359 value = false;
360 p += strlen("no");
361 }
362
363 for (i = 0; socketmap[i].name; i++) {
364 if (strcmp(p, socketmap[i].name) == 0) {
365 rumpsockets[socketmap[i].pf] = value;
366 break;
367 }
368 }
369 if (socketmap[i].name == NULL) {
370 warnx("invalid socket specifier %s", p);
371 }
372 }
373 }
374
375 static void
376 pathparser(char *buf)
377 {
378
379 if (*buf != '/')
380 errx(1, "hijack path specifier must begin with ``/''");
381
382 if ((rumpprefix = strdup(buf)) == NULL)
383 err(1, "strdup");
384 rumpprefixlen = strlen(rumpprefix);
385 }
386
387 static struct {
388 void (*parsefn)(char *);
389 const char *name;
390 } hijackparse[] = {
391 { sockparser, "socket" },
392 { pathparser, "path" },
393 { NULL, NULL },
394 };
395
396 static void
397 parsehijack(char *hijack)
398 {
399 char *p, *p2, *l;
400 const char *hijackcopy;
401 int i;
402
403 if ((hijackcopy = strdup(hijack)) == NULL)
404 err(1, "strdup");
405
406 /* disable everything explicitly */
407 for (i = 0; i < PF_MAX; i++)
408 rumpsockets[i] = false;
409
410 for (p = strtok_r(hijack, ",", &l); p; p = strtok_r(NULL, ",", &l)) {
411 p2 = strchr(p, '=');
412 if (!p2)
413 errx(1, "invalid hijack specifier: %s", hijackcopy);
414
415 for (i = 0; hijackparse[i].parsefn; i++) {
416 if (strncmp(hijackparse[i].name, p,
417 (size_t)(p2-p)) == 0) {
418 hijackparse[i].parsefn(p2+1);
419 break;
420 }
421 }
422 }
423
424 }
425
426 static void __attribute__((constructor))
427 rcinit(void)
428 {
429 char buf[1024];
430 extern void *(*rumpclient_dlsym)(void *, const char *);
431 unsigned i, j;
432
433 rumpclient_dlsym = hijackdlsym;
434 host_fork = dlsym(RTLD_NEXT, "fork");
435 host_daemon = dlsym(RTLD_NEXT, "daemon");
436 host_execve = dlsym(RTLD_NEXT, "execve");
437
438 /*
439 * In theory cannot print anything during lookups because
440 * we might not have the call vector set up. so, the errx()
441 * is a bit of a strech, but it might work.
442 */
443
444 for (i = 0; i < DUALCALL__NUM; i++) {
445 /* build runtime O(1) access */
446 for (j = 0; j < __arraycount(syscnames); j++) {
447 if (syscnames[j].scm_callnum == i)
448 break;
449 }
450
451 if (j == __arraycount(syscnames))
452 errx(1, "rumphijack error: syscall pos %d missing", i);
453
454 syscalls[i].bs_host = dlsym(RTLD_NEXT,
455 syscnames[j].scm_hostname);
456 if (syscalls[i].bs_host == NULL)
457 errx(1, "hostcall %s not found missing",
458 syscnames[j].scm_hostname);
459
460 syscalls[i].bs_rump = dlsym(RTLD_NEXT,
461 syscnames[j].scm_rumpname);
462 if (syscalls[i].bs_rump == NULL)
463 errx(1, "rumpcall %s not found missing",
464 syscnames[j].scm_rumpname);
465 }
466
467 if (rumpclient_init() == -1)
468 err(1, "rumpclient init");
469
470 /* check which syscalls we're supposed to hijack */
471 if (getenv_r("RUMPHIJACK", buf, sizeof(buf)) == -1) {
472 strcpy(buf, RUMPHIJACK_DEFAULT);
473 }
474 parsehijack(buf);
475
476 /* set client persistence level */
477 if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) {
478 if (strcmp(buf, "die") == 0)
479 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE);
480 else if (strcmp(buf, "inftime") == 0)
481 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME);
482 else if (strcmp(buf, "once") == 0)
483 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE);
484 else {
485 time_t timeout;
486 char *ep;
487
488 timeout = (time_t)strtoll(buf, &ep, 10);
489 if (timeout <= 0 || ep != buf + strlen(buf))
490 errx(1, "RUMPHIJACK_RETRYCONNECT must be "
491 "keyword or integer, got: %s", buf);
492
493 rumpclient_setconnretry(timeout);
494 }
495 }
496
497 if (getenv_r("RUMPHIJACK__DUP2MASK", buf, sizeof(buf)) == 0) {
498 dup2mask = strtoul(buf, NULL, 10);
499 unsetenv("RUMPHIJACK__DUP2MASK");
500 }
501 if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) {
502 pwdinrump = true;
503 unsetenv("RUMPHIJACK__PWDINRUMP");
504 }
505 }
506
507 /* XXX: need runtime selection. low for now due to FD_SETSIZE */
508 #define HIJACK_FDOFF 128
509 static int
510 fd_rump2host(int fd)
511 {
512
513 if (fd == -1)
514 return fd;
515
516 if (!ISDUP2D(fd))
517 fd += HIJACK_FDOFF;
518
519 return fd;
520 }
521
522 static int
523 fd_host2rump(int fd)
524 {
525
526 if (!ISDUP2D(fd))
527 fd -= HIJACK_FDOFF;
528 return fd;
529 }
530
531 static bool
532 fd_isrump(int fd)
533 {
534
535 return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
536 }
537
538 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_FDOFF)
539
540 static bool
541 path_isrump(const char *path)
542 {
543
544 if (rumpprefix == NULL)
545 return false;
546
547 if (*path == '/') {
548 if (strncmp(path, rumpprefix, rumpprefixlen) == 0)
549 return true;
550 return false;
551 } else {
552 return pwdinrump;
553 }
554 }
555
556 static const char *rootpath = "/";
557 static const char *
558 path_host2rump(const char *path)
559 {
560 const char *rv;
561
562 if (*path == '/') {
563 rv = path + rumpprefixlen;
564 if (*rv == '\0')
565 rv = rootpath;
566 } else {
567 rv = path;
568 }
569
570 return rv;
571 }
572
573 static int
574 dodup(int oldd, int minfd)
575 {
576 int (*op_fcntl)(int, int, ...);
577 int newd;
578 int isrump;
579
580 DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd));
581 if (fd_isrump(oldd)) {
582 op_fcntl = GETSYSCALL(rump, FCNTL);
583 oldd = fd_host2rump(oldd);
584 isrump = 1;
585 } else {
586 op_fcntl = GETSYSCALL(host, FCNTL);
587 isrump = 0;
588 }
589
590 newd = op_fcntl(oldd, F_DUPFD, minfd);
591
592 if (isrump)
593 newd = fd_rump2host(newd);
594 DPRINTF(("dup <- %d\n", newd));
595
596 return newd;
597 }
598
599 /*
600 * dup a host file descriptor so that it doesn't collide with the dup2mask
601 */
602 static int
603 fd_dupgood(int fd)
604 {
605 int (*op_fcntl)(int, int, ...) = GETSYSCALL(host, FCNTL);
606 int (*op_close)(int) = GETSYSCALL(host, CLOSE);
607 int ofd, i;
608
609 for (i = 1; ISDUP2D(fd); i++) {
610 ofd = fd;
611 fd = op_fcntl(ofd, F_DUPFD, i);
612 op_close(ofd);
613 }
614
615 return fd;
616 }
617
618 int
619 open(const char *path, int flags, ...)
620 {
621 int (*op_open)(const char *, int, ...);
622 bool isrump;
623 va_list ap;
624 int fd;
625
626 if (path_isrump(path)) {
627 path = path_host2rump(path);
628 op_open = GETSYSCALL(rump, OPEN);
629 isrump = true;
630 } else {
631 op_open = GETSYSCALL(host, OPEN);
632 isrump = false;
633 }
634
635 va_start(ap, flags);
636 fd = op_open(path, flags, va_arg(ap, mode_t));
637 va_end(ap);
638
639 if (isrump)
640 fd = fd_rump2host(fd);
641 else
642 fd = fd_dupgood(fd);
643 return fd;
644 }
645
646 int
647 chdir(const char *path)
648 {
649 int (*op_chdir)(const char *);
650 bool isrump;
651 int rv;
652
653 if (path_isrump(path)) {
654 op_chdir = GETSYSCALL(rump, CHDIR);
655 isrump = true;
656 path = path_host2rump(path);
657 } else {
658 op_chdir = GETSYSCALL(host, CHDIR);
659 isrump = false;
660 }
661
662 rv = op_chdir(path);
663 if (rv == 0) {
664 if (isrump)
665 pwdinrump = true;
666 else
667 pwdinrump = false;
668 }
669
670 return rv;
671 }
672
673 int
674 fchdir(int fd)
675 {
676 int (*op_fchdir)(int);
677 bool isrump;
678 int rv;
679
680 if (fd_isrump(fd)) {
681 op_fchdir = GETSYSCALL(rump, FCHDIR);
682 isrump = true;
683 fd = fd_host2rump(fd);
684 } else {
685 op_fchdir = GETSYSCALL(host, FCHDIR);
686 isrump = false;
687 }
688
689 rv = op_fchdir(fd);
690 if (rv == 0) {
691 if (isrump)
692 pwdinrump = true;
693 else
694 pwdinrump = false;
695 }
696
697 return rv;
698 }
699
700 int
701 rename(const char *from, const char *to)
702 {
703 int (*op_rename)(const char *, const char *);
704
705 if (path_isrump(from)) {
706 if (!path_isrump(to))
707 return EXDEV;
708
709 from = path_host2rump(from);
710 to = path_host2rump(to);
711 op_rename = GETSYSCALL(rump, RENAME);
712 } else {
713 if (path_isrump(to))
714 return EXDEV;
715
716 op_rename = GETSYSCALL(host, RENAME);
717 }
718
719 return op_rename(from, to);
720 }
721
722 int __socket30(int, int, int);
723 int
724 __socket30(int domain, int type, int protocol)
725 {
726 int (*op_socket)(int, int, int);
727 int fd;
728 bool isrump;
729
730 isrump = domain < PF_MAX && rumpsockets[domain];
731
732 if (isrump)
733 op_socket = GETSYSCALL(rump, SOCKET);
734 else
735 op_socket = GETSYSCALL(host, SOCKET);
736 fd = op_socket(domain, type, protocol);
737
738 if (isrump)
739 fd = fd_rump2host(fd);
740 else
741 fd = fd_dupgood(fd);
742 DPRINTF(("socket <- %d\n", fd));
743
744 return fd;
745 }
746
747 int
748 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
749 {
750 int (*op_accept)(int, struct sockaddr *, socklen_t *);
751 int fd;
752 bool isrump;
753
754 isrump = fd_isrump(s);
755
756 DPRINTF(("accept -> %d", s));
757 if (isrump) {
758 op_accept = GETSYSCALL(rump, ACCEPT);
759 s = fd_host2rump(s);
760 } else {
761 op_accept = GETSYSCALL(host, ACCEPT);
762 }
763 fd = op_accept(s, addr, addrlen);
764 if (fd != -1 && isrump)
765 fd = fd_rump2host(fd);
766 else
767 fd = fd_dupgood(fd);
768
769 DPRINTF((" <- %d\n", fd));
770
771 return fd;
772 }
773
774 /*
775 * ioctl and fcntl are varargs calls and need special treatment
776 */
777 int
778 ioctl(int fd, unsigned long cmd, ...)
779 {
780 int (*op_ioctl)(int, unsigned long cmd, ...);
781 va_list ap;
782 int rv;
783
784 DPRINTF(("ioctl -> %d\n", fd));
785 if (fd_isrump(fd)) {
786 fd = fd_host2rump(fd);
787 op_ioctl = GETSYSCALL(rump, IOCTL);
788 } else {
789 op_ioctl = GETSYSCALL(host, IOCTL);
790 }
791
792 va_start(ap, cmd);
793 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
794 va_end(ap);
795 return rv;
796 }
797
798 #include <syslog.h>
799 int
800 fcntl(int fd, int cmd, ...)
801 {
802 int (*op_fcntl)(int, int, ...);
803 va_list ap;
804 int rv, minfd, i;
805
806 DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd));
807
808 switch (cmd) {
809 case F_DUPFD:
810 va_start(ap, cmd);
811 minfd = va_arg(ap, int);
812 va_end(ap);
813 return dodup(fd, minfd);
814
815 case F_CLOSEM:
816 /*
817 * So, if fd < HIJACKOFF, we want to do a host closem.
818 */
819
820 if (fd < HIJACK_FDOFF) {
821 int closemfd = fd;
822
823 if (rumpclient__closenotify(&closemfd,
824 RUMPCLIENT_CLOSE_FCLOSEM) == -1)
825 return -1;
826 op_fcntl = GETSYSCALL(host, FCNTL);
827 rv = op_fcntl(closemfd, cmd);
828 if (rv)
829 return rv;
830 }
831
832 /*
833 * Additionally, we want to do a rump closem, but only
834 * for the file descriptors not within the dup2mask.
835 */
836
837 /* why don't we offer fls()? */
838 for (i = 15; i >= 0; i--) {
839 if (ISDUP2D(i))
840 break;
841 }
842
843 if (fd >= HIJACK_FDOFF)
844 fd -= HIJACK_FDOFF;
845 else
846 fd = 0;
847 fd = MAX(i+1, fd);
848
849 /* hmm, maybe we should close rump fd's not within dup2mask? */
850
851 return rump_sys_fcntl(fd, F_CLOSEM);
852
853 case F_MAXFD:
854 /*
855 * For maxfd, if there's a rump kernel fd, return
856 * it hostified. Otherwise, return host's MAXFD
857 * return value.
858 */
859 if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) {
860 /*
861 * This might go a little wrong in case
862 * of dup2 to [012], but I'm not sure if
863 * there's a justification for tracking
864 * that info. Consider e.g.
865 * dup2(rumpfd, 2) followed by rump_sys_open()
866 * returning 1. We should return 1+HIJACKOFF,
867 * not 2+HIJACKOFF. However, if [01] is not
868 * open, the correct return value is 2.
869 */
870 return fd_rump2host(fd);
871 } else {
872 op_fcntl = GETSYSCALL(host, FCNTL);
873 return op_fcntl(fd, F_MAXFD);
874 }
875 /*NOTREACHED*/
876
877 default:
878 if (fd_isrump(fd)) {
879 fd = fd_host2rump(fd);
880 op_fcntl = GETSYSCALL(rump, FCNTL);
881 } else {
882 op_fcntl = GETSYSCALL(host, FCNTL);
883 }
884
885 va_start(ap, cmd);
886 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
887 va_end(ap);
888 return rv;
889 }
890 /*NOTREACHED*/
891 }
892
893 int
894 close(int fd)
895 {
896 int (*op_close)(int);
897 int rv;
898
899 DPRINTF(("close -> %d\n", fd));
900 if (fd_isrump(fd)) {
901 int undup2 = 0;
902
903 fd = fd_host2rump(fd);
904 if (ISDUP2ALIAS(fd)) {
905 _DIAGASSERT(ISDUP2D(fd));
906 CLRDUP2ALIAS(fd);
907 return 0;
908 }
909
910 if (ISDUP2D(fd))
911 undup2 = 1;
912 op_close = GETSYSCALL(rump, CLOSE);
913 rv = op_close(fd);
914 if (rv == 0 && undup2)
915 CLRDUP2(fd);
916 } else {
917 if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1)
918 return -1;
919 op_close = GETSYSCALL(host, CLOSE);
920 rv = op_close(fd);
921 }
922
923 return rv;
924 }
925
926 /*
927 * write cannot issue a standard debug printf due to recursion
928 */
929 ssize_t
930 write(int fd, const void *buf, size_t blen)
931 {
932 ssize_t (*op_write)(int, const void *, size_t);
933
934 if (fd_isrump(fd)) {
935 fd = fd_host2rump(fd);
936 op_write = GETSYSCALL(rump, WRITE);
937 } else {
938 op_write = GETSYSCALL(host, WRITE);
939 }
940
941 return op_write(fd, buf, blen);
942 }
943
944 /*
945 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
946 * many programs do that. dup2 of a rump kernel fd to another value
947 * not >= fdoff is an error.
948 *
949 * Note: cannot rump2host newd, because it is often hardcoded.
950 */
951 int
952 dup2(int oldd, int newd)
953 {
954 int (*host_dup2)(int, int);
955 int rv;
956
957 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
958
959 if (fd_isrump(oldd)) {
960 if (!(newd >= 0 && newd <= 2))
961 return EBADF;
962 oldd = fd_host2rump(oldd);
963 if (oldd == newd) {
964 SETDUP2(newd);
965 SETDUP2ALIAS(newd);
966 return newd;
967 }
968 rv = rump_sys_dup2(oldd, newd);
969 if (rv != -1)
970 SETDUP2(newd);
971 } else {
972 host_dup2 = syscalls[DUALCALL_DUP2].bs_host;
973 if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1)
974 return -1;
975 rv = host_dup2(oldd, newd);
976 }
977
978 return rv;
979 }
980
981 int
982 dup(int oldd)
983 {
984
985 return dodup(oldd, 0);
986 }
987
988 pid_t
989 fork()
990 {
991 pid_t rv;
992
993 DPRINTF(("fork\n"));
994
995 rv = rumpclient__dofork(host_fork);
996
997 DPRINTF(("fork returns %d\n", rv));
998 return rv;
999 }
1000 /* we do not have the luxury of not requiring a stackframe */
1001 __strong_alias(__vfork14,fork);
1002
1003 int
1004 daemon(int nochdir, int noclose)
1005 {
1006 struct rumpclient_fork *rf;
1007
1008 if ((rf = rumpclient_prefork()) == NULL)
1009 return -1;
1010
1011 if (host_daemon(nochdir, noclose) == -1)
1012 return -1;
1013
1014 if (rumpclient_fork_init(rf) == -1)
1015 return -1;
1016
1017 return 0;
1018 }
1019
1020 int
1021 execve(const char *path, char *const argv[], char *const envp[])
1022 {
1023 char buf[128];
1024 char *dup2str;
1025 const char *pwdinrumpstr;
1026 char **newenv;
1027 size_t nelem;
1028 int rv, sverrno;
1029 int bonus = 1, i = 0;
1030
1031 if (dup2mask) {
1032 snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2MASK=%u", dup2mask);
1033 dup2str = malloc(strlen(buf)+1);
1034 if (dup2str == NULL)
1035 return ENOMEM;
1036 strcpy(dup2str, buf);
1037 bonus++;
1038 } else {
1039 dup2str = NULL;
1040 }
1041
1042 if (pwdinrump) {
1043 pwdinrumpstr = "RUMPHIJACK__PWDINRUMP=true";
1044 bonus++;
1045 } else {
1046 pwdinrumpstr = NULL;
1047 }
1048
1049 for (nelem = 0; envp && envp[nelem]; nelem++)
1050 continue;
1051 newenv = malloc(sizeof(*newenv) * nelem+bonus);
1052 if (newenv == NULL) {
1053 free(dup2str);
1054 return ENOMEM;
1055 }
1056 memcpy(newenv, envp, nelem*sizeof(*newenv));
1057 if (dup2str) {
1058 newenv[nelem+i] = dup2str;
1059 i++;
1060 }
1061 if (pwdinrumpstr) {
1062 newenv[nelem+i] = __UNCONST(pwdinrumpstr);
1063 i++;
1064 }
1065 newenv[nelem+i] = NULL;
1066 _DIAGASSERT(i < bonus);
1067
1068 rv = rumpclient_exec(path, argv, newenv);
1069
1070 _DIAGASSERT(rv != 0);
1071 sverrno = errno;
1072 free(newenv);
1073 free(dup2str);
1074 errno = sverrno;
1075 return rv;
1076 }
1077
1078 /*
1079 * select is done by calling poll.
1080 */
1081 int
1082 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
1083 struct timeval *timeout)
1084 {
1085 struct pollfd *pfds;
1086 struct timespec ts, *tsp = NULL;
1087 nfds_t realnfds;
1088 int i, j;
1089 int rv, incr;
1090
1091 DPRINTF(("select\n"));
1092
1093 /*
1094 * Well, first we must scan the fds to figure out how many
1095 * fds there really are. This is because up to and including
1096 * nb5 poll() silently refuses nfds > process_maxopen_fds.
1097 * Seems to be fixed in current, thank the maker.
1098 * god damn cluster...bomb.
1099 */
1100
1101 for (i = 0, realnfds = 0; i < nfds; i++) {
1102 if (readfds && FD_ISSET(i, readfds)) {
1103 realnfds++;
1104 continue;
1105 }
1106 if (writefds && FD_ISSET(i, writefds)) {
1107 realnfds++;
1108 continue;
1109 }
1110 if (exceptfds && FD_ISSET(i, exceptfds)) {
1111 realnfds++;
1112 continue;
1113 }
1114 }
1115
1116 if (realnfds) {
1117 pfds = calloc(realnfds, sizeof(*pfds));
1118 if (!pfds)
1119 return -1;
1120 } else {
1121 pfds = NULL;
1122 }
1123
1124 for (i = 0, j = 0; i < nfds; i++) {
1125 incr = 0;
1126 if (readfds && FD_ISSET(i, readfds)) {
1127 pfds[j].fd = i;
1128 pfds[j].events |= POLLIN;
1129 incr=1;
1130 }
1131 if (writefds && FD_ISSET(i, writefds)) {
1132 pfds[j].fd = i;
1133 pfds[j].events |= POLLOUT;
1134 incr=1;
1135 }
1136 if (exceptfds && FD_ISSET(i, exceptfds)) {
1137 pfds[j].fd = i;
1138 pfds[j].events |= POLLHUP|POLLERR;
1139 incr=1;
1140 }
1141 if (incr)
1142 j++;
1143 }
1144 assert(j == (int)realnfds);
1145
1146 if (timeout) {
1147 TIMEVAL_TO_TIMESPEC(timeout, &ts);
1148 tsp = &ts;
1149 }
1150 rv = REALPOLLTS(pfds, realnfds, tsp, NULL);
1151 /*
1152 * "If select() returns with an error the descriptor sets
1153 * will be unmodified"
1154 */
1155 if (rv < 0)
1156 goto out;
1157
1158 /*
1159 * zero out results (can't use FD_ZERO for the
1160 * obvious select-me-not reason). whee.
1161 *
1162 * We do this here since some software ignores the return
1163 * value of select, and hence if the timeout expires, it may
1164 * assume all input descriptors have activity.
1165 */
1166 for (i = 0; i < nfds; i++) {
1167 if (readfds)
1168 FD_CLR(i, readfds);
1169 if (writefds)
1170 FD_CLR(i, writefds);
1171 if (exceptfds)
1172 FD_CLR(i, exceptfds);
1173 }
1174 if (rv == 0)
1175 goto out;
1176
1177 /*
1178 * We have >0 fds with activity. Harvest the results.
1179 */
1180 for (i = 0; i < (int)realnfds; i++) {
1181 if (readfds) {
1182 if (pfds[i].revents & POLLIN) {
1183 FD_SET(pfds[i].fd, readfds);
1184 }
1185 }
1186 if (writefds) {
1187 if (pfds[i].revents & POLLOUT) {
1188 FD_SET(pfds[i].fd, writefds);
1189 }
1190 }
1191 if (exceptfds) {
1192 if (pfds[i].revents & (POLLHUP|POLLERR)) {
1193 FD_SET(pfds[i].fd, exceptfds);
1194 }
1195 }
1196 }
1197
1198 out:
1199 free(pfds);
1200 return rv;
1201 }
1202
1203 static void
1204 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
1205 {
1206 nfds_t i;
1207
1208 for (i = 0; i < nfds; i++) {
1209 if (fds[i].fd == -1)
1210 continue;
1211
1212 if (fd_isrump(fds[i].fd))
1213 (*rumpcall)++;
1214 else
1215 (*hostcall)++;
1216 }
1217 }
1218
1219 static void
1220 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
1221 {
1222 nfds_t i;
1223
1224 for (i = 0; i < nfds; i++) {
1225 fds[i].fd = fdadj(fds[i].fd);
1226 }
1227 }
1228
1229 /*
1230 * poll is easy as long as the call comes in the fds only in one
1231 * kernel. otherwise its quite tricky...
1232 */
1233 struct pollarg {
1234 struct pollfd *pfds;
1235 nfds_t nfds;
1236 const struct timespec *ts;
1237 const sigset_t *sigmask;
1238 int pipefd;
1239 int errnum;
1240 };
1241
1242 static void *
1243 hostpoll(void *arg)
1244 {
1245 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
1246 const sigset_t *);
1247 struct pollarg *parg = arg;
1248 intptr_t rv;
1249
1250 op_pollts = GETSYSCALL(host, POLLTS);
1251 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
1252 if (rv == -1)
1253 parg->errnum = errno;
1254 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
1255
1256 return (void *)(intptr_t)rv;
1257 }
1258
1259 int
1260 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
1261 const sigset_t *sigmask)
1262 {
1263 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
1264 const sigset_t *);
1265 int (*host_close)(int);
1266 int hostcall = 0, rumpcall = 0;
1267 pthread_t pt;
1268 nfds_t i;
1269 int rv;
1270
1271 DPRINTF(("poll\n"));
1272 checkpoll(fds, nfds, &hostcall, &rumpcall);
1273
1274 if (hostcall && rumpcall) {
1275 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
1276 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
1277 struct pollarg parg;
1278 uintptr_t lrv;
1279 int sverrno = 0, trv;
1280
1281 /*
1282 * ok, this is where it gets tricky. We must support
1283 * this since it's a very common operation in certain
1284 * types of software (telnet, netcat, etc). We allocate
1285 * two vectors and run two poll commands in separate
1286 * threads. Whichever returns first "wins" and the
1287 * other kernel's fds won't show activity.
1288 */
1289 rv = -1;
1290
1291 /* allocate full vector for O(n) joining after call */
1292 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
1293 if (!pfd_host)
1294 goto out;
1295 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
1296 if (!pfd_rump) {
1297 goto out;
1298 }
1299
1300 /* split vectors */
1301 for (i = 0; i < nfds; i++) {
1302 if (fds[i].fd == -1) {
1303 pfd_host[i].fd = -1;
1304 pfd_rump[i].fd = -1;
1305 } else if (fd_isrump(fds[i].fd)) {
1306 pfd_host[i].fd = -1;
1307 pfd_rump[i].fd = fd_host2rump(fds[i].fd);
1308 pfd_rump[i].events = fds[i].events;
1309 } else {
1310 pfd_rump[i].fd = -1;
1311 pfd_host[i].fd = fds[i].fd;
1312 pfd_host[i].events = fds[i].events;
1313 }
1314 pfd_rump[i].revents = pfd_host[i].revents = 0;
1315 fds[i].revents = 0;
1316 }
1317
1318 /*
1319 * then, open two pipes, one for notifications
1320 * to each kernel.
1321 */
1322 if (rump_sys_pipe(rpipe) == -1)
1323 goto out;
1324 if (pipe(hpipe) == -1)
1325 goto out;
1326
1327 pfd_host[nfds].fd = hpipe[0];
1328 pfd_host[nfds].events = POLLIN;
1329 pfd_rump[nfds].fd = rpipe[0];
1330 pfd_rump[nfds].events = POLLIN;
1331
1332 /*
1333 * then, create a thread to do host part and meanwhile
1334 * do rump kernel part right here
1335 */
1336
1337 parg.pfds = pfd_host;
1338 parg.nfds = nfds+1;
1339 parg.ts = ts;
1340 parg.sigmask = sigmask;
1341 parg.pipefd = rpipe[1];
1342 pthread_create(&pt, NULL, hostpoll, &parg);
1343
1344 op_pollts = GETSYSCALL(rump, POLLTS);
1345 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
1346 sverrno = errno;
1347 write(hpipe[1], &rv, sizeof(rv));
1348 pthread_join(pt, (void *)&trv);
1349
1350 /* check who "won" and merge results */
1351 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
1352 rv = trv;
1353
1354 for (i = 0; i < nfds; i++) {
1355 if (pfd_rump[i].fd != -1)
1356 fds[i].revents = pfd_rump[i].revents;
1357 }
1358 sverrno = parg.errnum;
1359 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
1360 rv = trv;
1361
1362 for (i = 0; i < nfds; i++) {
1363 if (pfd_host[i].fd != -1)
1364 fds[i].revents = pfd_host[i].revents;
1365 }
1366 } else {
1367 rv = 0;
1368 }
1369
1370 out:
1371 host_close = GETSYSCALL(host, CLOSE);
1372 if (rpipe[0] != -1)
1373 rump_sys_close(rpipe[0]);
1374 if (rpipe[1] != -1)
1375 rump_sys_close(rpipe[1]);
1376 if (hpipe[0] != -1)
1377 host_close(hpipe[0]);
1378 if (hpipe[1] != -1)
1379 host_close(hpipe[1]);
1380 free(pfd_host);
1381 free(pfd_rump);
1382 errno = sverrno;
1383 } else {
1384 if (hostcall) {
1385 op_pollts = GETSYSCALL(host, POLLTS);
1386 } else {
1387 op_pollts = GETSYSCALL(rump, POLLTS);
1388 adjustpoll(fds, nfds, fd_host2rump);
1389 }
1390
1391 rv = op_pollts(fds, nfds, ts, sigmask);
1392 if (rumpcall)
1393 adjustpoll(fds, nfds, fd_rump2host);
1394 }
1395
1396 return rv;
1397 }
1398
1399 int
1400 poll(struct pollfd *fds, nfds_t nfds, int timeout)
1401 {
1402 struct timespec ts;
1403 struct timespec *tsp = NULL;
1404
1405 if (timeout != INFTIM) {
1406 ts.tv_sec = timeout / 1000;
1407 ts.tv_nsec = (timeout % 1000) * 1000*1000;
1408
1409 tsp = &ts;
1410 }
1411
1412 return REALPOLLTS(fds, nfds, tsp, NULL);
1413 }
1414
1415 int
1416 REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges,
1417 struct kevent *eventlist, size_t nevents,
1418 const struct timespec *timeout)
1419 {
1420 int (*op_kevent)(int, const struct kevent *, size_t,
1421 struct kevent *, size_t, const struct timespec *);
1422 const struct kevent *ev;
1423 size_t i;
1424
1425 /*
1426 * Check that we don't attempt to kevent rump kernel fd's.
1427 * That needs similar treatment to select/poll, but is slightly
1428 * trickier since we need to manage to different kq descriptors.
1429 * (TODO, in case you're wondering).
1430 */
1431 for (i = 0; i < nchanges; i++) {
1432 ev = &changelist[i];
1433 if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE ||
1434 ev->filter == EVFILT_VNODE) {
1435 if (fd_isrump((int)ev->ident))
1436 return ENOTSUP;
1437 }
1438 }
1439
1440 op_kevent = GETSYSCALL(host, KEVENT);
1441 return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout);
1442 }
1443
1444 /*
1445 * Rest are std type calls.
1446 */
1447
1448 FDCALL(int, bind, DUALCALL_BIND, \
1449 (int fd, const struct sockaddr *name, socklen_t namelen), \
1450 (int, const struct sockaddr *, socklen_t), \
1451 (fd, name, namelen))
1452
1453 FDCALL(int, connect, DUALCALL_CONNECT, \
1454 (int fd, const struct sockaddr *name, socklen_t namelen), \
1455 (int, const struct sockaddr *, socklen_t), \
1456 (fd, name, namelen))
1457
1458 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \
1459 (int fd, struct sockaddr *name, socklen_t *namelen), \
1460 (int, struct sockaddr *, socklen_t *), \
1461 (fd, name, namelen))
1462
1463 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \
1464 (int fd, struct sockaddr *name, socklen_t *namelen), \
1465 (int, struct sockaddr *, socklen_t *), \
1466 (fd, name, namelen))
1467
1468 FDCALL(int, listen, DUALCALL_LISTEN, \
1469 (int fd, int backlog), \
1470 (int, int), \
1471 (fd, backlog))
1472
1473 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \
1474 (int fd, void *buf, size_t len, int flags, \
1475 struct sockaddr *from, socklen_t *fromlen), \
1476 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \
1477 (fd, buf, len, flags, from, fromlen))
1478
1479 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \
1480 (int fd, const void *buf, size_t len, int flags, \
1481 const struct sockaddr *to, socklen_t tolen), \
1482 (int, const void *, size_t, int, \
1483 const struct sockaddr *, socklen_t), \
1484 (fd, buf, len, flags, to, tolen))
1485
1486 FDCALL(ssize_t, recvmsg, DUALCALL_RECVMSG, \
1487 (int fd, struct msghdr *msg, int flags), \
1488 (int, struct msghdr *, int), \
1489 (fd, msg, flags))
1490
1491 FDCALL(ssize_t, sendmsg, DUALCALL_SENDMSG, \
1492 (int fd, const struct msghdr *msg, int flags), \
1493 (int, const struct msghdr *, int), \
1494 (fd, msg, flags))
1495
1496 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \
1497 (int fd, int level, int optn, void *optval, socklen_t *optlen), \
1498 (int, int, int, void *, socklen_t *), \
1499 (fd, level, optn, optval, optlen))
1500
1501 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \
1502 (int fd, int level, int optn, \
1503 const void *optval, socklen_t optlen), \
1504 (int, int, int, const void *, socklen_t), \
1505 (fd, level, optn, optval, optlen))
1506
1507 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \
1508 (int fd, int how), \
1509 (int, int), \
1510 (fd, how))
1511
1512 #if _FORTIFY_SOURCE > 0
1513 #define STUB(fun) __ssp_weak_name(fun)
1514 ssize_t _sys_readlink(const char * __restrict, char * __restrict, size_t);
1515 ssize_t
1516 STUB(readlink)(const char * __restrict path, char * __restrict buf,
1517 size_t bufsiz)
1518 {
1519 return _sys_readlink(path, buf, bufsiz);
1520 }
1521
1522 char *_sys_getcwd(char *, size_t);
1523 char *
1524 STUB(getcwd)(char *buf, size_t size)
1525 {
1526 return _sys_getcwd(buf, size);
1527 }
1528 #else
1529 #define STUB(fun) fun
1530 #endif
1531
1532 FDCALL(ssize_t, REALREAD, DUALCALL_READ, \
1533 (int fd, void *buf, size_t buflen), \
1534 (int, void *, size_t), \
1535 (fd, buf, buflen))
1536
1537 FDCALL(ssize_t, readv, DUALCALL_READV, \
1538 (int fd, const struct iovec *iov, int iovcnt), \
1539 (int, const struct iovec *, int), \
1540 (fd, iov, iovcnt))
1541
1542 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \
1543 (int fd, const struct iovec *iov, int iovcnt), \
1544 (int, const struct iovec *, int), \
1545 (fd, iov, iovcnt))
1546
1547 FDCALL(int, REALFSTAT, DUALCALL_FSTAT, \
1548 (int fd, struct stat *sb), \
1549 (int, struct stat *), \
1550 (fd, sb))
1551
1552 FDCALL(int, fstatvfs1, DUALCALL_FSTATVFS1, \
1553 (int fd, struct statvfs *buf, int flags), \
1554 (int, struct statvfs *, int), \
1555 (fd, buf, flags))
1556
1557 FDCALL(off_t, REALLSEEK, DUALCALL_LSEEK, \
1558 (int fd, off_t offset, int whence), \
1559 (int, off_t, int), \
1560 (fd, offset, whence))
1561
1562 FDCALL(int, REALGETDENTS, DUALCALL_GETDENTS, \
1563 (int fd, char *buf, size_t nbytes), \
1564 (int, char *, size_t), \
1565 (fd, buf, nbytes))
1566
1567 FDCALL(int, fchown, DUALCALL_FCHOWN, \
1568 (int fd, uid_t owner, gid_t group), \
1569 (int, uid_t, gid_t), \
1570 (fd, owner, group))
1571
1572 FDCALL(int, fchmod, DUALCALL_FCHMOD, \
1573 (int fd, mode_t mode), \
1574 (int, mode_t), \
1575 (fd, mode))
1576
1577 FDCALL(int, ftruncate, DUALCALL_FTRUNCATE, \
1578 (int fd, off_t length), \
1579 (int, off_t), \
1580 (fd, length))
1581
1582 FDCALL(int, fsync, DUALCALL_FSYNC, \
1583 (int fd), \
1584 (int), \
1585 (fd))
1586
1587 FDCALL(int, fsync_range, DUALCALL_FSYNC_RANGE, \
1588 (int fd, int how, off_t start, off_t length), \
1589 (int, int, off_t, off_t), \
1590 (fd, how, start, length))
1591
1592 FDCALL(int, futimes, DUALCALL_FUTIMES, \
1593 (int fd, const struct timeval *tv), \
1594 (int, const struct timeval *), \
1595 (fd, tv))
1596
1597 /*
1598 * path-based selectors
1599 */
1600
1601 PATHCALL(int, REALSTAT, DUALCALL_STAT, \
1602 (const char *path, struct stat *sb), \
1603 (const char *, struct stat *), \
1604 (path, sb))
1605
1606 PATHCALL(int, REALLSTAT, DUALCALL_LSTAT, \
1607 (const char *path, struct stat *sb), \
1608 (const char *, struct stat *), \
1609 (path, sb))
1610
1611 PATHCALL(int, chown, DUALCALL_CHOWN, \
1612 (const char *path, uid_t owner, gid_t group), \
1613 (const char *, uid_t, gid_t), \
1614 (path, owner, group))
1615
1616 PATHCALL(int, lchown, DUALCALL_LCHOWN, \
1617 (const char *path, uid_t owner, gid_t group), \
1618 (const char *, uid_t, gid_t), \
1619 (path, owner, group))
1620
1621 PATHCALL(int, chmod, DUALCALL_CHMOD, \
1622 (const char *path, mode_t mode), \
1623 (const char *, mode_t), \
1624 (path, mode))
1625
1626 PATHCALL(int, lchmod, DUALCALL_LCHMOD, \
1627 (const char *path, mode_t mode), \
1628 (const char *, mode_t), \
1629 (path, mode))
1630
1631 PATHCALL(int, statvfs1, DUALCALL_STATVFS1, \
1632 (const char *path, struct statvfs *buf, int flags), \
1633 (const char *, struct statvfs *, int), \
1634 (path, buf, flags))
1635
1636 PATHCALL(int, unlink, DUALCALL_UNLINK, \
1637 (const char *path), \
1638 (const char *), \
1639 (path))
1640
1641 PATHCALL(int, symlink, DUALCALL_SYMLINK, \
1642 (const char *path, const char *target), \
1643 (const char *, const char *), \
1644 (path, target))
1645
1646 PATHCALL(ssize_t, readlink, DUALCALL_READLINK, \
1647 (const char *path, char *buf, size_t bufsiz), \
1648 (const char *, char *, size_t), \
1649 (path, buf, bufsiz))
1650
1651 PATHCALL(int, mkdir, DUALCALL_MKDIR, \
1652 (const char *path, mode_t mode), \
1653 (const char *, mode_t), \
1654 (path, mode))
1655
1656 PATHCALL(int, rmdir, DUALCALL_RMDIR, \
1657 (const char *path), \
1658 (const char *), \
1659 (path))
1660
1661 PATHCALL(int, utimes, DUALCALL_UTIMES, \
1662 (const char *path, const struct timeval *tv), \
1663 (const char *, const struct timeval *), \
1664 (path, tv))
1665
1666 PATHCALL(int, lutimes, DUALCALL_LUTIMES, \
1667 (const char *path, const struct timeval *tv), \
1668 (const char *, const struct timeval *), \
1669 (path, tv))
1670
1671 PATHCALL(int, truncate, DUALCALL_TRUNCATE, \
1672 (const char *path, off_t length), \
1673 (const char *, off_t), \
1674 (path, length))
1675
1676 /*
1677 * Note: with mount the decisive parameter is the mount
1678 * destination directory. This is because we don't really know
1679 * about the "source" directory in a generic call (and besides,
1680 * it might not even exist, cf. nfs).
1681 */
1682 PATHCALL(int, REALMOUNT, DUALCALL_MOUNT, \
1683 (const char *type, const char *path, int flags, \
1684 void *data, size_t dlen), \
1685 (const char *, const char *, int, void *, size_t), \
1686 (type, path, flags, data, dlen))
1687
1688 PATHCALL(int, unmount, DUALCALL_UNMOUNT, \
1689 (const char *path, int flags), \
1690 (const char *, int), \
1691 (path, flags))
1692