hijack.c revision 1.104 1 /* $NetBSD: hijack.c,v 1.104 2013/07/27 17:37:29 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "rumpuser_port.h"
29
30 #if !defined(lint)
31 __RCSID("$NetBSD: hijack.c,v 1.104 2013/07/27 17:37:29 pooka Exp $");
32 #endif
33
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/mount.h>
39 #include <sys/poll.h>
40 #include <sys/socket.h>
41 #include <sys/stat.h>
42 #include <sys/statvfs.h>
43 #include <sys/time.h>
44 #include <sys/uio.h>
45
46 #ifdef PLATFORM_HAS_KQUEUE
47 #include <sys/event.h>
48 #endif
49
50 #ifdef PLATFORM_HAS_NBQUOTA
51 #include <sys/quotactl.h>
52 #endif
53
54 #include <assert.h>
55 #include <dlfcn.h>
56 #include <err.h>
57 #include <errno.h>
58 #include <fcntl.h>
59 #include <poll.h>
60 #include <pthread.h>
61 #include <signal.h>
62 #include <stdarg.h>
63 #include <stdbool.h>
64 #include <stdint.h>
65 #include <stdio.h>
66 #include <stdlib.h>
67 #include <string.h>
68 #include <time.h>
69 #include <unistd.h>
70
71 #include <rump/rumpclient.h>
72 #include <rump/rump_syscalls.h>
73
74 #include "hijack.h"
75
76 /*
77 * XXX: Consider autogenerating this, syscnames[] and syscalls[] with
78 * a DSL where the tool also checks the symbols exported by this library
79 * to make sure all relevant calls are accounted for.
80 */
81 enum dualcall {
82 DUALCALL_WRITE, DUALCALL_WRITEV, DUALCALL_PWRITE, DUALCALL_PWRITEV,
83 DUALCALL_IOCTL, DUALCALL_FCNTL,
84 DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT,
85 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN,
86 DUALCALL_RECVFROM, DUALCALL_RECVMSG,
87 DUALCALL_SENDTO, DUALCALL_SENDMSG,
88 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT,
89 DUALCALL_SHUTDOWN,
90 DUALCALL_READ, DUALCALL_READV, DUALCALL_PREAD, DUALCALL_PREADV,
91 DUALCALL_DUP2,
92 DUALCALL_CLOSE,
93 DUALCALL_POLLTS,
94
95 #ifndef __linux__
96 DUALCALL_STAT, DUALCALL_LSTAT, DUALCALL_FSTAT,
97 #endif
98
99 DUALCALL_CHMOD, DUALCALL_LCHMOD, DUALCALL_FCHMOD,
100 DUALCALL_CHOWN, DUALCALL_LCHOWN, DUALCALL_FCHOWN,
101 DUALCALL_OPEN,
102 DUALCALL_CHDIR, DUALCALL_FCHDIR,
103 DUALCALL_LSEEK,
104 DUALCALL_UNLINK, DUALCALL_SYMLINK, DUALCALL_READLINK,
105 DUALCALL_LINK, DUALCALL_RENAME,
106 DUALCALL_MKDIR, DUALCALL_RMDIR,
107 DUALCALL_UTIMES, DUALCALL_LUTIMES, DUALCALL_FUTIMES,
108 DUALCALL_TRUNCATE, DUALCALL_FTRUNCATE,
109 DUALCALL_FSYNC,
110 DUALCALL_ACCESS,
111
112 #ifndef __linux__
113 DUALCALL___GETCWD,
114 DUALCALL_GETDENTS,
115 #endif
116
117 #ifndef __linux__
118 DUALCALL_MKNOD,
119 #endif
120
121 #ifdef PLATFORM_HAS_NBFILEHANDLE
122 DUALCALL_GETFH, DUALCALL_FHOPEN, DUALCALL_FHSTAT, DUALCALL_FHSTATVFS1,
123 #endif
124
125 #ifdef PLATFORM_HAS_KQUEUE
126 DUALCALL_KEVENT,
127 #endif
128
129 #ifdef PLATFORM_HAS_NBSYSCTL
130 DUALCALL___SYSCTL,
131 #endif
132
133 #ifdef PLATFORM_HAS_NFSSVC
134 DUALCALL_NFSSVC,
135 #endif
136
137 #ifdef PLATFORM_HAS_NBVFSSTAT
138 DUALCALL_STATVFS1, DUALCALL_FSTATVFS1, DUALCALL_GETVFSSTAT,
139 #endif
140
141 #ifdef PLATFORM_HAS_NBMOUNT
142 DUALCALL_MOUNT, DUALCALL_UNMOUNT,
143 #endif
144
145 #ifdef PLATFORM_HAS_FSYNC_RANGE
146 DUALCALL_FSYNC_RANGE,
147 #endif
148
149 #ifdef PLATFORM_HAS_CHFLAGS
150 DUALCALL_CHFLAGS, DUALCALL_LCHFLAGS, DUALCALL_FCHFLAGS,
151 #endif
152
153 #ifdef PLATFORM_HAS_NBQUOTA
154 DUALCALL_QUOTACTL,
155 #endif
156 DUALCALL__NUM
157 };
158
159 #define RSYS_STRING(a) __STRING(a)
160 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
161
162 /*
163 * Would be nice to get this automatically in sync with libc.
164 * Also, this does not work for compat-using binaries (we should
165 * provide all previous interfaces, not just the current ones)
166 */
167 #if defined(__NetBSD__)
168
169 #if !__NetBSD_Prereq__(5,99,7)
170 #define REALSELECT select
171 #define REALPOLLTS pollts
172 #define REALKEVENT kevent
173 #define REALSTAT __stat30
174 #define REALLSTAT __lstat30
175 #define REALFSTAT __fstat30
176 #define REALUTIMES utimes
177 #define REALLUTIMES lutimes
178 #define REALFUTIMES futimes
179 #define REALMKNOD mknod
180 #define REALFHSTAT __fhstat40
181 #else /* >= 5.99.7 */
182 #define REALSELECT _sys___select50
183 #define REALPOLLTS _sys___pollts50
184 #define REALKEVENT _sys___kevent50
185 #define REALSTAT __stat50
186 #define REALLSTAT __lstat50
187 #define REALFSTAT __fstat50
188 #define REALUTIMES __utimes50
189 #define REALLUTIMES __lutimes50
190 #define REALFUTIMES __futimes50
191 #define REALMKNOD __mknod50
192 #define REALFHSTAT __fhstat50
193 #endif /* < 5.99.7 */
194
195 #define REALREAD _sys_read
196 #define REALPREAD _sys_pread
197 #define REALPWRITE _sys_pwrite
198 #define REALGETDENTS __getdents30
199 #define REALMOUNT __mount50
200 #define REALGETFH __getfh30
201 #define REALFHOPEN __fhopen40
202 #define REALFHSTATVFS1 __fhstatvfs140
203 #define OLDREALQUOTACTL __quotactl50 /* 5.99.48-62 only */
204 #define REALSOCKET __socket30
205
206 #define LSEEK_ALIAS _lseek
207 #define VFORK __vfork14
208
209 int REALSTAT(const char *, struct stat *);
210 int REALLSTAT(const char *, struct stat *);
211 int REALFSTAT(int, struct stat *);
212 int REALMKNOD(const char *, mode_t, dev_t);
213 int REALGETDENTS(int, char *, size_t);
214
215 int __getcwd(char *, size_t);
216
217 #elif defined(__linux__) /* glibc, really */
218
219 #define REALREAD read
220 #define REALPREAD pread
221 #define REALPWRITE pwrite
222 #define REALSELECT select
223 #define REALPOLLTS ppoll
224 #define REALUTIMES utimes
225 #define REALLUTIMES lutimes
226 #define REALFUTIMES futimes
227 #define REALFHSTAT fhstat
228 #define REALSOCKET socket
229
230 #else /* !NetBSD && !linux */
231
232 #error platform not supported
233
234 #endif /* platform */
235
236 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
237 int REALPOLLTS(struct pollfd *, nfds_t,
238 const struct timespec *, const sigset_t *);
239 int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t,
240 const struct timespec *);
241 ssize_t REALREAD(int, void *, size_t);
242 ssize_t REALPREAD(int, void *, size_t, off_t);
243 ssize_t REALPWRITE(int, const void *, size_t, off_t);
244 int REALUTIMES(const char *, const struct timeval [2]);
245 int REALLUTIMES(const char *, const struct timeval [2]);
246 int REALFUTIMES(int, const struct timeval [2]);
247 int REALMOUNT(const char *, const char *, int, void *, size_t);
248 int REALGETFH(const char *, void *, size_t *);
249 int REALFHOPEN(const void *, size_t, int);
250 int REALFHSTAT(const void *, size_t, struct stat *);
251 int REALFHSTATVFS1(const void *, size_t, struct statvfs *, int);
252 int REALSOCKET(int, int, int);
253
254 #ifdef PLATFORM_HAS_NBQUOTA
255 int OLDREALQUOTACTL(const char *, struct plistref *);
256 #endif
257
258 #define S(a) __STRING(a)
259 struct sysnames {
260 enum dualcall scm_callnum;
261 const char *scm_hostname;
262 const char *scm_rumpname;
263 } syscnames[] = {
264 { DUALCALL_SOCKET, S(REALSOCKET), RSYS_NAME(SOCKET) },
265 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) },
266 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) },
267 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) },
268 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) },
269 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) },
270 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) },
271 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) },
272 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) },
273 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) },
274 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) },
275 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) },
276 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) },
277 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) },
278 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) },
279 { DUALCALL_READV, "readv", RSYS_NAME(READV) },
280 { DUALCALL_PREAD, S(REALPREAD), RSYS_NAME(PREAD) },
281 { DUALCALL_PREADV, "preadv", RSYS_NAME(PREADV) },
282 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) },
283 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) },
284 { DUALCALL_PWRITE, S(REALPWRITE), RSYS_NAME(PWRITE) },
285 { DUALCALL_PWRITEV, "pwritev", RSYS_NAME(PWRITEV) },
286 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) },
287 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) },
288 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) },
289 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) },
290 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) },
291 #ifndef __linux__
292 { DUALCALL_STAT, S(REALSTAT), RSYS_NAME(STAT) },
293 { DUALCALL_LSTAT, S(REALLSTAT), RSYS_NAME(LSTAT) },
294 { DUALCALL_FSTAT, S(REALFSTAT), RSYS_NAME(FSTAT) },
295 #endif
296 { DUALCALL_CHOWN, "chown", RSYS_NAME(CHOWN) },
297 { DUALCALL_LCHOWN, "lchown", RSYS_NAME(LCHOWN) },
298 { DUALCALL_FCHOWN, "fchown", RSYS_NAME(FCHOWN) },
299 { DUALCALL_CHMOD, "chmod", RSYS_NAME(CHMOD) },
300 { DUALCALL_LCHMOD, "lchmod", RSYS_NAME(LCHMOD) },
301 { DUALCALL_FCHMOD, "fchmod", RSYS_NAME(FCHMOD) },
302 { DUALCALL_UTIMES, S(REALUTIMES), RSYS_NAME(UTIMES) },
303 { DUALCALL_LUTIMES, S(REALLUTIMES), RSYS_NAME(LUTIMES) },
304 { DUALCALL_FUTIMES, S(REALFUTIMES), RSYS_NAME(FUTIMES) },
305 { DUALCALL_OPEN, "open", RSYS_NAME(OPEN) },
306 { DUALCALL_CHDIR, "chdir", RSYS_NAME(CHDIR) },
307 { DUALCALL_FCHDIR, "fchdir", RSYS_NAME(FCHDIR) },
308 { DUALCALL_LSEEK, "lseek", RSYS_NAME(LSEEK) },
309 { DUALCALL_UNLINK, "unlink", RSYS_NAME(UNLINK) },
310 { DUALCALL_SYMLINK, "symlink", RSYS_NAME(SYMLINK) },
311 { DUALCALL_READLINK, "readlink", RSYS_NAME(READLINK) },
312 { DUALCALL_LINK, "link", RSYS_NAME(LINK) },
313 { DUALCALL_RENAME, "rename", RSYS_NAME(RENAME) },
314 { DUALCALL_MKDIR, "mkdir", RSYS_NAME(MKDIR) },
315 { DUALCALL_RMDIR, "rmdir", RSYS_NAME(RMDIR) },
316 { DUALCALL_TRUNCATE, "truncate", RSYS_NAME(TRUNCATE) },
317 { DUALCALL_FTRUNCATE, "ftruncate", RSYS_NAME(FTRUNCATE) },
318 { DUALCALL_FSYNC, "fsync", RSYS_NAME(FSYNC) },
319 { DUALCALL_ACCESS, "access", RSYS_NAME(ACCESS) },
320
321 #ifndef __linux__
322 { DUALCALL___GETCWD, "__getcwd", RSYS_NAME(__GETCWD) },
323 { DUALCALL_GETDENTS, S(REALGETDENTS),RSYS_NAME(GETDENTS) },
324 #endif
325
326 #ifndef __linux__
327 { DUALCALL_MKNOD, S(REALMKNOD), RSYS_NAME(MKNOD) },
328 #endif
329
330 #ifdef PLATFORM_HAS_NBFILEHANDLE
331 { DUALCALL_GETFH, S(REALGETFH), RSYS_NAME(GETFH) },
332 { DUALCALL_FHOPEN, S(REALFHOPEN), RSYS_NAME(FHOPEN) },
333 { DUALCALL_FHSTAT, S(REALFHSTAT), RSYS_NAME(FHSTAT) },
334 { DUALCALL_FHSTATVFS1, S(REALFHSTATVFS1),RSYS_NAME(FHSTATVFS1) },
335 #endif
336
337 #ifdef PLATFORM_HAS_KQUEUE
338 { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) },
339 #endif
340
341 #ifdef PLATFORM_HAS_NBSYSCTL
342 { DUALCALL___SYSCTL, "__sysctl", RSYS_NAME(__SYSCTL) },
343 #endif
344
345 #ifdef PLATFORM_HAS_NFSSVC
346 { DUALCALL_NFSSVC, "nfssvc", RSYS_NAME(NFSSVC) },
347 #endif
348
349 #ifdef PLATFORM_HAS_NBVFSSTAT
350 { DUALCALL_STATVFS1, "statvfs1", RSYS_NAME(STATVFS1) },
351 { DUALCALL_FSTATVFS1, "fstatvfs1", RSYS_NAME(FSTATVFS1) },
352 { DUALCALL_GETVFSSTAT, "getvfsstat", RSYS_NAME(GETVFSSTAT) },
353 #endif
354
355 #ifdef PLATFORM_HAS_NBMOUNT
356 { DUALCALL_MOUNT, S(REALMOUNT), RSYS_NAME(MOUNT) },
357 { DUALCALL_UNMOUNT, "unmount", RSYS_NAME(UNMOUNT) },
358 #endif
359
360 #ifdef PLATFORM_HAS_FSYNC_RANGE
361 { DUALCALL_FSYNC_RANGE, "fsync_range", RSYS_NAME(FSYNC_RANGE) },
362 #endif
363
364 #ifdef PLATFORM_HAS_CHFLAGS
365 { DUALCALL_CHFLAGS, "chflags", RSYS_NAME(CHFLAGS) },
366 { DUALCALL_LCHFLAGS, "lchflags", RSYS_NAME(LCHFLAGS) },
367 { DUALCALL_FCHFLAGS, "fchflags", RSYS_NAME(FCHFLAGS) },
368 #endif /* PLATFORM_HAS_CHFLAGS */
369
370 #ifdef PLATFORM_HAS_NBQUOTA
371 #if __NetBSD_Prereq__(5,99,63)
372 { DUALCALL_QUOTACTL, "__quotactl", RSYS_NAME(__QUOTACTL) },
373 #elif __NetBSD_Prereq__(5,99,48)
374 { DUALCALL_QUOTACTL, S(OLDREALQUOTACTL),RSYS_NAME(QUOTACTL) },
375 #endif
376 #endif /* PLATFORM_HAS_NBQUOTA */
377
378 };
379 #undef S
380
381 struct bothsys {
382 void *bs_host;
383 void *bs_rump;
384 } syscalls[DUALCALL__NUM];
385 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which
386
387 static pid_t (*host_fork)(void);
388 static int (*host_daemon)(int, int);
389 static void * (*host_mmap)(void *, size_t, int, int, int, off_t);
390
391 /*
392 * This tracks if our process is in a subdirectory of /rump.
393 * It's preserved over exec.
394 */
395 static bool pwdinrump;
396
397 enum pathtype { PATH_HOST, PATH_RUMP, PATH_RUMPBLANKET };
398
399 static bool fd_isrump(int);
400 static enum pathtype path_isrump(const char *);
401
402 /* default FD_SETSIZE is 256 ==> default fdoff is 128 */
403 static int hijack_fdoff = FD_SETSIZE/2;
404
405 /*
406 * Maintain a mapping table for the usual dup2 suspects.
407 * Could use atomic ops to operate on dup2vec, but an application
408 * racing there is not well-defined, so don't bother.
409 */
410 /* note: you cannot change this without editing the env-passing code */
411 #define DUP2HIGH 2
412 static uint32_t dup2vec[DUP2HIGH+1];
413 #define DUP2BIT (1<<31)
414 #define DUP2ALIAS (1<<30)
415 #define DUP2FDMASK ((1<<30)-1)
416
417 static bool
418 isdup2d(int fd)
419 {
420
421 return fd <= DUP2HIGH && fd >= 0 && dup2vec[fd] & DUP2BIT;
422 }
423
424 static int
425 mapdup2(int hostfd)
426 {
427
428 _DIAGASSERT(isdup2d(hostfd));
429 return dup2vec[hostfd] & DUP2FDMASK;
430 }
431
432 static int
433 unmapdup2(int rumpfd)
434 {
435 int i;
436
437 for (i = 0; i <= DUP2HIGH; i++) {
438 if (dup2vec[i] & DUP2BIT &&
439 (dup2vec[i] & DUP2FDMASK) == (unsigned)rumpfd)
440 return i;
441 }
442 return -1;
443 }
444
445 static void
446 setdup2(int hostfd, int rumpfd)
447 {
448
449 if (hostfd > DUP2HIGH) {
450 _DIAGASSERT(0);
451 return;
452 }
453
454 dup2vec[hostfd] = DUP2BIT | DUP2ALIAS | rumpfd;
455 }
456
457 static void
458 clrdup2(int hostfd)
459 {
460
461 if (hostfd > DUP2HIGH) {
462 _DIAGASSERT(0);
463 return;
464 }
465
466 dup2vec[hostfd] = 0;
467 }
468
469 static bool
470 killdup2alias(int rumpfd)
471 {
472 int hostfd;
473
474 if ((hostfd = unmapdup2(rumpfd)) == -1)
475 return false;
476
477 if (dup2vec[hostfd] & DUP2ALIAS) {
478 dup2vec[hostfd] &= ~DUP2ALIAS;
479 return true;
480 }
481 return false;
482 }
483
484 //#define DEBUGJACK
485 #ifdef DEBUGJACK
486 #define DPRINTF(x) mydprintf x
487 static void
488 mydprintf(const char *fmt, ...)
489 {
490 va_list ap;
491
492 if (isdup2d(STDERR_FILENO))
493 return;
494
495 va_start(ap, fmt);
496 vfprintf(stderr, fmt, ap);
497 va_end(ap);
498 }
499
500 static const char *
501 whichfd(int fd)
502 {
503
504 if (fd == -1)
505 return "-1";
506 else if (fd_isrump(fd))
507 return "rump";
508 else
509 return "host";
510 }
511
512 static const char *
513 whichpath(const char *path)
514 {
515
516 if (path_isrump(path))
517 return "rump";
518 else
519 return "host";
520 }
521
522 #else
523 #define DPRINTF(x)
524 #endif
525
526 #define FDCALL(type, name, rcname, args, proto, vars) \
527 type name args \
528 { \
529 type (*fun) proto; \
530 \
531 DPRINTF(("%s -> %d (%s)\n", __STRING(name), fd, whichfd(fd))); \
532 if (fd_isrump(fd)) { \
533 fun = syscalls[rcname].bs_rump; \
534 fd = fd_host2rump(fd); \
535 } else { \
536 fun = syscalls[rcname].bs_host; \
537 } \
538 \
539 return fun vars; \
540 }
541
542 #define PATHCALL(type, name, rcname, args, proto, vars) \
543 type name args \
544 { \
545 type (*fun) proto; \
546 enum pathtype pt; \
547 \
548 DPRINTF(("%s -> %s (%s)\n", __STRING(name), path, \
549 whichpath(path))); \
550 if ((pt = path_isrump(path)) != PATH_HOST) { \
551 fun = syscalls[rcname].bs_rump; \
552 if (pt == PATH_RUMP) \
553 path = path_host2rump(path); \
554 } else { \
555 fun = syscalls[rcname].bs_host; \
556 } \
557 \
558 return fun vars; \
559 }
560
561 #define VFSCALL(bit, type, name, rcname, args, proto, vars) \
562 type name args \
563 { \
564 type (*fun) proto; \
565 \
566 DPRINTF(("%s (0x%x, 0x%x)\n", __STRING(name), bit, vfsbits)); \
567 if (vfsbits & bit) { \
568 fun = syscalls[rcname].bs_rump; \
569 } else { \
570 fun = syscalls[rcname].bs_host; \
571 } \
572 \
573 return fun vars; \
574 }
575
576 /*
577 * These variables are set from the RUMPHIJACK string and control
578 * which operations can product rump kernel file descriptors.
579 * This should be easily extendable for future needs.
580 */
581 #define RUMPHIJACK_DEFAULT "path=/rump,socket=all:nolocal"
582 static bool rumpsockets[PF_MAX];
583 static const char *rumpprefix;
584 static size_t rumpprefixlen;
585
586 static struct {
587 int pf;
588 const char *name;
589 } socketmap[] = {
590 { PF_LOCAL, "local" },
591 { PF_INET, "inet" },
592 #ifdef PF_LINK
593 { PF_LINK, "link" },
594 #endif
595 #ifdef PF_OROUTE
596 { PF_OROUTE, "oroute" },
597 #endif
598 { PF_ROUTE, "route" },
599 { PF_INET6, "inet6" },
600 #ifdef PF_MPLS
601 { PF_MPLS, "mpls" },
602 #endif
603 { -1, NULL }
604 };
605
606 static void
607 sockparser(char *buf)
608 {
609 char *p, *l = NULL;
610 bool value;
611 int i;
612
613 /* if "all" is present, it must be specified first */
614 if (strncmp(buf, "all", strlen("all")) == 0) {
615 for (i = 0; i < (int)__arraycount(rumpsockets); i++) {
616 rumpsockets[i] = true;
617 }
618 buf += strlen("all");
619 if (*buf == ':')
620 buf++;
621 }
622
623 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) {
624 value = true;
625 if (strncmp(p, "no", strlen("no")) == 0) {
626 value = false;
627 p += strlen("no");
628 }
629
630 for (i = 0; socketmap[i].name; i++) {
631 if (strcmp(p, socketmap[i].name) == 0) {
632 rumpsockets[socketmap[i].pf] = value;
633 break;
634 }
635 }
636 if (socketmap[i].name == NULL) {
637 errx(1, "invalid socket specifier %s", p);
638 }
639 }
640 }
641
642 static void
643 pathparser(char *buf)
644 {
645
646 /* sanity-check */
647 if (*buf != '/')
648 errx(1, "hijack path specifier must begin with ``/''");
649 rumpprefixlen = strlen(buf);
650 if (rumpprefixlen < 2)
651 errx(1, "invalid hijack prefix: %s", buf);
652 if (buf[rumpprefixlen-1] == '/' && strspn(buf, "/") != rumpprefixlen)
653 errx(1, "hijack prefix may end in slash only if pure "
654 "slash, gave %s", buf);
655
656 if ((rumpprefix = strdup(buf)) == NULL)
657 err(1, "strdup");
658 rumpprefixlen = strlen(rumpprefix);
659 }
660
661 static struct blanket {
662 const char *pfx;
663 size_t len;
664 } *blanket;
665 static int nblanket;
666
667 static void
668 blanketparser(char *buf)
669 {
670 char *p, *l = NULL;
671 int i;
672
673 for (nblanket = 0, p = buf; p; p = strchr(p+1, ':'), nblanket++)
674 continue;
675
676 blanket = malloc(nblanket * sizeof(*blanket));
677 if (blanket == NULL)
678 err(1, "alloc blanket %d", nblanket);
679
680 for (p = strtok_r(buf, ":", &l), i = 0; p;
681 p = strtok_r(NULL, ":", &l), i++) {
682 blanket[i].pfx = strdup(p);
683 if (blanket[i].pfx == NULL)
684 err(1, "strdup blanket");
685 blanket[i].len = strlen(p);
686
687 if (blanket[i].len == 0 || *blanket[i].pfx != '/')
688 errx(1, "invalid blanket specifier %s", p);
689 if (*(blanket[i].pfx + blanket[i].len-1) == '/')
690 errx(1, "invalid blanket specifier %s", p);
691 }
692 }
693
694 #define VFSBIT_NFSSVC 0x01
695 #define VFSBIT_GETVFSSTAT 0x02
696 #define VFSBIT_FHCALLS 0x04
697 static unsigned vfsbits;
698
699 static struct {
700 int bit;
701 const char *name;
702 } vfscalls[] = {
703 { VFSBIT_NFSSVC, "nfssvc" },
704 { VFSBIT_GETVFSSTAT, "getvfsstat" },
705 { VFSBIT_FHCALLS, "fhcalls" },
706 { -1, NULL }
707 };
708
709 static void
710 vfsparser(char *buf)
711 {
712 char *p, *l = NULL;
713 bool turnon;
714 unsigned int fullmask;
715 int i;
716
717 /* build the full mask and sanity-check while we're at it */
718 fullmask = 0;
719 for (i = 0; vfscalls[i].name != NULL; i++) {
720 if (fullmask & vfscalls[i].bit)
721 errx(1, "problem exists between vi and chair");
722 fullmask |= vfscalls[i].bit;
723 }
724
725
726 /* if "all" is present, it must be specified first */
727 if (strncmp(buf, "all", strlen("all")) == 0) {
728 vfsbits = fullmask;
729 buf += strlen("all");
730 if (*buf == ':')
731 buf++;
732 }
733
734 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) {
735 turnon = true;
736 if (strncmp(p, "no", strlen("no")) == 0) {
737 turnon = false;
738 p += strlen("no");
739 }
740
741 for (i = 0; vfscalls[i].name; i++) {
742 if (strcmp(p, vfscalls[i].name) == 0) {
743 if (turnon)
744 vfsbits |= vfscalls[i].bit;
745 else
746 vfsbits &= ~vfscalls[i].bit;
747 break;
748 }
749 }
750 if (vfscalls[i].name == NULL) {
751 errx(1, "invalid vfscall specifier %s", p);
752 }
753 }
754 }
755
756 static bool rumpsysctl = false;
757
758 static void
759 sysctlparser(char *buf)
760 {
761
762 if (buf == NULL) {
763 rumpsysctl = true;
764 return;
765 }
766
767 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 ||
768 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) {
769 rumpsysctl = true;
770 return;
771 }
772 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) {
773 rumpsysctl = false;
774 return;
775 }
776
777 errx(1, "sysctl value should be y(es)/n(o), gave: %s", buf);
778 }
779
780 static void
781 fdoffparser(char *buf)
782 {
783 unsigned long fdoff;
784 char *ep;
785
786 if (*buf == '-') {
787 errx(1, "fdoff must not be negative");
788 }
789 fdoff = strtoul(buf, &ep, 10);
790 if (*ep != '\0')
791 errx(1, "invalid fdoff specifier \"%s\"", buf);
792 if (fdoff >= INT_MAX/2 || fdoff < 3)
793 errx(1, "fdoff out of range");
794 hijack_fdoff = fdoff;
795 }
796
797 static struct {
798 void (*parsefn)(char *);
799 const char *name;
800 bool needvalues;
801 } hijackparse[] = {
802 { sockparser, "socket", true },
803 { pathparser, "path", true },
804 { blanketparser, "blanket", true },
805 { vfsparser, "vfs", true },
806 { sysctlparser, "sysctl", false },
807 { fdoffparser, "fdoff", true },
808 { NULL, NULL, false },
809 };
810
811 static void
812 parsehijack(char *hijack)
813 {
814 char *p, *p2, *l;
815 const char *hijackcopy;
816 bool nop2;
817 int i;
818
819 if ((hijackcopy = strdup(hijack)) == NULL)
820 err(1, "strdup");
821
822 /* disable everything explicitly */
823 for (i = 0; i < PF_MAX; i++)
824 rumpsockets[i] = false;
825
826 for (p = strtok_r(hijack, ",", &l); p; p = strtok_r(NULL, ",", &l)) {
827 nop2 = false;
828 p2 = strchr(p, '=');
829 if (!p2) {
830 nop2 = true;
831 p2 = p + strlen(p);
832 }
833
834 for (i = 0; hijackparse[i].parsefn; i++) {
835 if (strncmp(hijackparse[i].name, p,
836 (size_t)(p2-p)) == 0) {
837 if (nop2 && hijackparse[i].needvalues)
838 errx(1, "invalid hijack specifier: %s",
839 hijackcopy);
840 hijackparse[i].parsefn(nop2 ? NULL : p2+1);
841 break;
842 }
843 }
844
845 if (hijackparse[i].parsefn == NULL)
846 errx(1, "invalid hijack specifier name in %s", p);
847 }
848
849 }
850
851 static void __attribute__((constructor))
852 rcinit(void)
853 {
854 char buf[1024];
855 unsigned i, j;
856
857 host_fork = dlsym(RTLD_NEXT, "fork");
858 host_daemon = dlsym(RTLD_NEXT, "daemon");
859 host_mmap = dlsym(RTLD_NEXT, "mmap");
860
861 /*
862 * In theory cannot print anything during lookups because
863 * we might not have the call vector set up. so, the errx()
864 * is a bit of a strech, but it might work.
865 */
866
867 for (i = 0; i < DUALCALL__NUM; i++) {
868 /* build runtime O(1) access */
869 for (j = 0; j < __arraycount(syscnames); j++) {
870 if (syscnames[j].scm_callnum == i)
871 break;
872 }
873
874 if (j == __arraycount(syscnames))
875 errx(1, "rumphijack error: syscall pos %d missing", i);
876
877 syscalls[i].bs_host = dlsym(RTLD_NEXT,
878 syscnames[j].scm_hostname);
879 if (syscalls[i].bs_host == NULL)
880 errx(1, "hostcall %s not found!",
881 syscnames[j].scm_hostname);
882
883 syscalls[i].bs_rump = dlsym(RTLD_NEXT,
884 syscnames[j].scm_rumpname);
885 if (syscalls[i].bs_rump == NULL)
886 errx(1, "rumpcall %s not found!",
887 syscnames[j].scm_rumpname);
888 }
889
890 if (rumpclient_init() == -1)
891 err(1, "rumpclient init");
892
893 /* check which syscalls we're supposed to hijack */
894 if (getenv_r("RUMPHIJACK", buf, sizeof(buf)) == -1) {
895 strcpy(buf, RUMPHIJACK_DEFAULT);
896 }
897 parsehijack(buf);
898
899 /* set client persistence level */
900 if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) {
901 if (strcmp(buf, "die") == 0)
902 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE);
903 else if (strcmp(buf, "inftime") == 0)
904 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME);
905 else if (strcmp(buf, "once") == 0)
906 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE);
907 else {
908 time_t timeout;
909 char *ep;
910
911 timeout = (time_t)strtoll(buf, &ep, 10);
912 if (timeout <= 0 || ep != buf + strlen(buf))
913 errx(1, "RUMPHIJACK_RETRYCONNECT must be "
914 "keyword or integer, got: %s", buf);
915
916 rumpclient_setconnretry(timeout);
917 }
918 }
919
920 if (getenv_r("RUMPHIJACK__DUP2INFO", buf, sizeof(buf)) == 0) {
921 if (sscanf(buf, "%u,%u,%u",
922 &dup2vec[0], &dup2vec[1], &dup2vec[2]) != 3) {
923 warnx("invalid dup2mask: %s", buf);
924 memset(dup2vec, 0, sizeof(dup2vec));
925 }
926 unsetenv("RUMPHIJACK__DUP2INFO");
927 }
928 if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) {
929 pwdinrump = true;
930 unsetenv("RUMPHIJACK__PWDINRUMP");
931 }
932 }
933
934 static int
935 fd_rump2host(int fd)
936 {
937
938 if (fd == -1)
939 return fd;
940 return fd + hijack_fdoff;
941 }
942
943 static int
944 fd_rump2host_withdup(int fd)
945 {
946 int hfd;
947
948 _DIAGASSERT(fd != -1);
949 hfd = unmapdup2(fd);
950 if (hfd != -1) {
951 _DIAGASSERT(hfd <= DUP2HIGH);
952 return hfd;
953 }
954 return fd_rump2host(fd);
955 }
956
957 static int
958 fd_host2rump(int fd)
959 {
960
961 if (!isdup2d(fd))
962 return fd - hijack_fdoff;
963 else
964 return mapdup2(fd);
965 }
966
967 static bool
968 fd_isrump(int fd)
969 {
970
971 return isdup2d(fd) || fd >= hijack_fdoff;
972 }
973
974 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= hijack_fdoff)
975
976 static enum pathtype
977 path_isrump(const char *path)
978 {
979 size_t plen;
980 int i;
981
982 if (rumpprefix == NULL && nblanket == 0)
983 return PATH_HOST;
984
985 if (*path == '/') {
986 plen = strlen(path);
987 if (rumpprefix && plen >= rumpprefixlen) {
988 if (strncmp(path, rumpprefix, rumpprefixlen) == 0
989 && (plen == rumpprefixlen
990 || *(path + rumpprefixlen) == '/')) {
991 return PATH_RUMP;
992 }
993 }
994 for (i = 0; i < nblanket; i++) {
995 if (strncmp(path, blanket[i].pfx, blanket[i].len) == 0)
996 return PATH_RUMPBLANKET;
997 }
998
999 return PATH_HOST;
1000 } else {
1001 return pwdinrump ? PATH_RUMP : PATH_HOST;
1002 }
1003 }
1004
1005 static const char *rootpath = "/";
1006 static const char *
1007 path_host2rump(const char *path)
1008 {
1009 const char *rv;
1010
1011 if (*path == '/') {
1012 rv = path + rumpprefixlen;
1013 if (*rv == '\0')
1014 rv = rootpath;
1015 } else {
1016 rv = path;
1017 }
1018
1019 return rv;
1020 }
1021
1022 static int
1023 dodup(int oldd, int minfd)
1024 {
1025 int (*op_fcntl)(int, int, ...);
1026 int newd;
1027 int isrump;
1028
1029 DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd));
1030 if (fd_isrump(oldd)) {
1031 op_fcntl = GETSYSCALL(rump, FCNTL);
1032 oldd = fd_host2rump(oldd);
1033 if (minfd >= hijack_fdoff)
1034 minfd -= hijack_fdoff;
1035 isrump = 1;
1036 } else {
1037 op_fcntl = GETSYSCALL(host, FCNTL);
1038 isrump = 0;
1039 }
1040
1041 newd = op_fcntl(oldd, F_DUPFD, minfd);
1042
1043 if (isrump)
1044 newd = fd_rump2host(newd);
1045 DPRINTF(("dup <- %d\n", newd));
1046
1047 return newd;
1048 }
1049
1050 /*
1051 * Check that host fd value does not exceed fdoffset and if necessary
1052 * dup the file descriptor so that it doesn't collide with the dup2mask.
1053 */
1054 static int
1055 fd_host2host(int fd)
1056 {
1057 int (*op_fcntl)(int, int, ...) = GETSYSCALL(host, FCNTL);
1058 int (*op_close)(int) = GETSYSCALL(host, CLOSE);
1059 int ofd, i;
1060
1061 if (fd >= hijack_fdoff) {
1062 op_close(fd);
1063 errno = ENFILE;
1064 return -1;
1065 }
1066
1067 for (i = 1; isdup2d(fd); i++) {
1068 ofd = fd;
1069 fd = op_fcntl(ofd, F_DUPFD, i);
1070 op_close(ofd);
1071 }
1072
1073 return fd;
1074 }
1075
1076 int
1077 open(const char *path, int flags, ...)
1078 {
1079 int (*op_open)(const char *, int, ...);
1080 bool isrump;
1081 va_list ap;
1082 enum pathtype pt;
1083 int fd;
1084
1085 DPRINTF(("open -> %s (%s)\n", path, whichpath(path)));
1086
1087 if ((pt = path_isrump(path)) != PATH_HOST) {
1088 if (pt == PATH_RUMP)
1089 path = path_host2rump(path);
1090 op_open = GETSYSCALL(rump, OPEN);
1091 isrump = true;
1092 } else {
1093 op_open = GETSYSCALL(host, OPEN);
1094 isrump = false;
1095 }
1096
1097 va_start(ap, flags);
1098 fd = op_open(path, flags, va_arg(ap, mode_t));
1099 va_end(ap);
1100
1101 if (isrump)
1102 fd = fd_rump2host(fd);
1103 else
1104 fd = fd_host2host(fd);
1105
1106 DPRINTF(("open <- %d (%s)\n", fd, whichfd(fd)));
1107 return fd;
1108 }
1109
1110 int
1111 chdir(const char *path)
1112 {
1113 int (*op_chdir)(const char *);
1114 enum pathtype pt;
1115 int rv;
1116
1117 if ((pt = path_isrump(path)) != PATH_HOST) {
1118 op_chdir = GETSYSCALL(rump, CHDIR);
1119 if (pt == PATH_RUMP)
1120 path = path_host2rump(path);
1121 } else {
1122 op_chdir = GETSYSCALL(host, CHDIR);
1123 }
1124
1125 rv = op_chdir(path);
1126 if (rv == 0)
1127 pwdinrump = pt != PATH_HOST;
1128
1129 return rv;
1130 }
1131
1132 int
1133 fchdir(int fd)
1134 {
1135 int (*op_fchdir)(int);
1136 bool isrump;
1137 int rv;
1138
1139 if (fd_isrump(fd)) {
1140 op_fchdir = GETSYSCALL(rump, FCHDIR);
1141 isrump = true;
1142 fd = fd_host2rump(fd);
1143 } else {
1144 op_fchdir = GETSYSCALL(host, FCHDIR);
1145 isrump = false;
1146 }
1147
1148 rv = op_fchdir(fd);
1149 if (rv == 0) {
1150 pwdinrump = isrump;
1151 }
1152
1153 return rv;
1154 }
1155
1156 #ifndef __linux__
1157 int
1158 __getcwd(char *bufp, size_t len)
1159 {
1160 int (*op___getcwd)(char *, size_t);
1161 size_t prefixgap;
1162 bool iamslash;
1163 int rv;
1164
1165 if (pwdinrump && rumpprefix) {
1166 if (rumpprefix[rumpprefixlen-1] == '/')
1167 iamslash = true;
1168 else
1169 iamslash = false;
1170
1171 if (iamslash)
1172 prefixgap = rumpprefixlen - 1; /* ``//+path'' */
1173 else
1174 prefixgap = rumpprefixlen; /* ``/pfx+/path'' */
1175 if (len <= prefixgap) {
1176 errno = ERANGE;
1177 return -1;
1178 }
1179
1180 op___getcwd = GETSYSCALL(rump, __GETCWD);
1181 rv = op___getcwd(bufp + prefixgap, len - prefixgap);
1182 if (rv == -1)
1183 return rv;
1184
1185 /* augment the "/" part only for a non-root path */
1186 memcpy(bufp, rumpprefix, rumpprefixlen);
1187
1188 /* append / only to non-root cwd */
1189 if (rv != 2)
1190 bufp[prefixgap] = '/';
1191
1192 /* don't append extra slash in the purely-slash case */
1193 if (rv == 2 && !iamslash)
1194 bufp[rumpprefixlen] = '\0';
1195 } else if (pwdinrump) {
1196 /* assume blanket. we can't provide a prefix here */
1197 op___getcwd = GETSYSCALL(rump, __GETCWD);
1198 rv = op___getcwd(bufp, len);
1199 } else {
1200 op___getcwd = GETSYSCALL(host, __GETCWD);
1201 rv = op___getcwd(bufp, len);
1202 }
1203
1204 return rv;
1205 }
1206 #endif
1207
1208 static int
1209 moveish(const char *from, const char *to,
1210 int (*rump_op)(const char *, const char *),
1211 int (*host_op)(const char *, const char *))
1212 {
1213 int (*op)(const char *, const char *);
1214 enum pathtype ptf, ptt;
1215
1216 if ((ptf = path_isrump(from)) != PATH_HOST) {
1217 if ((ptt = path_isrump(to)) == PATH_HOST) {
1218 errno = EXDEV;
1219 return -1;
1220 }
1221
1222 if (ptf == PATH_RUMP)
1223 from = path_host2rump(from);
1224 if (ptt == PATH_RUMP)
1225 to = path_host2rump(to);
1226 op = rump_op;
1227 } else {
1228 if (path_isrump(to) != PATH_HOST) {
1229 errno = EXDEV;
1230 return -1;
1231 }
1232
1233 op = host_op;
1234 }
1235
1236 return op(from, to);
1237 }
1238
1239 int
1240 link(const char *from, const char *to)
1241 {
1242 return moveish(from, to,
1243 GETSYSCALL(rump, LINK), GETSYSCALL(host, LINK));
1244 }
1245
1246 int
1247 rename(const char *from, const char *to)
1248 {
1249 return moveish(from, to,
1250 GETSYSCALL(rump, RENAME), GETSYSCALL(host, RENAME));
1251 }
1252
1253 int
1254 REALSOCKET(int domain, int type, int protocol)
1255 {
1256 int (*op_socket)(int, int, int);
1257 int fd;
1258 bool isrump;
1259
1260 isrump = domain < PF_MAX && rumpsockets[domain];
1261
1262 if (isrump)
1263 op_socket = GETSYSCALL(rump, SOCKET);
1264 else
1265 op_socket = GETSYSCALL(host, SOCKET);
1266 fd = op_socket(domain, type, protocol);
1267
1268 if (isrump)
1269 fd = fd_rump2host(fd);
1270 else
1271 fd = fd_host2host(fd);
1272 DPRINTF(("socket <- %d\n", fd));
1273
1274 return fd;
1275 }
1276
1277 int
1278 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
1279 {
1280 int (*op_accept)(int, struct sockaddr *, socklen_t *);
1281 int fd;
1282 bool isrump;
1283
1284 isrump = fd_isrump(s);
1285
1286 DPRINTF(("accept -> %d", s));
1287 if (isrump) {
1288 op_accept = GETSYSCALL(rump, ACCEPT);
1289 s = fd_host2rump(s);
1290 } else {
1291 op_accept = GETSYSCALL(host, ACCEPT);
1292 }
1293 fd = op_accept(s, addr, addrlen);
1294 if (fd != -1 && isrump)
1295 fd = fd_rump2host(fd);
1296 else
1297 fd = fd_host2host(fd);
1298
1299 DPRINTF((" <- %d\n", fd));
1300
1301 return fd;
1302 }
1303
1304 /*
1305 * ioctl() and fcntl() are varargs calls and need special treatment.
1306 */
1307
1308 /*
1309 * Various [Linux] libc's have various signatures for ioctl so we
1310 * need to handle the discrepancies. On NetBSD, we use the
1311 * one with unsigned long cmd.
1312 */
1313 int
1314 #ifdef HAVE_IOCTL_CMD_INT
1315 ioctl(int fd, int cmd, ...)
1316 {
1317 int (*op_ioctl)(int, int cmd, ...);
1318 #else
1319 ioctl(int fd, unsigned long cmd, ...)
1320 {
1321 int (*op_ioctl)(int, unsigned long cmd, ...);
1322 #endif
1323 va_list ap;
1324 int rv;
1325
1326 DPRINTF(("ioctl -> %d\n", fd));
1327 if (fd_isrump(fd)) {
1328 fd = fd_host2rump(fd);
1329 op_ioctl = GETSYSCALL(rump, IOCTL);
1330 } else {
1331 op_ioctl = GETSYSCALL(host, IOCTL);
1332 }
1333
1334 va_start(ap, cmd);
1335 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
1336 va_end(ap);
1337 return rv;
1338 }
1339
1340 int
1341 fcntl(int fd, int cmd, ...)
1342 {
1343 int (*op_fcntl)(int, int, ...);
1344 va_list ap;
1345 int rv, minfd;
1346
1347 DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd));
1348
1349 switch (cmd) {
1350 case F_DUPFD:
1351 va_start(ap, cmd);
1352 minfd = va_arg(ap, int);
1353 va_end(ap);
1354 return dodup(fd, minfd);
1355
1356 #ifdef F_CLOSEM
1357 case F_CLOSEM: {
1358 int maxdup2, i;
1359
1360 /*
1361 * So, if fd < HIJACKOFF, we want to do a host closem.
1362 */
1363
1364 if (fd < hijack_fdoff) {
1365 int closemfd = fd;
1366
1367 if (rumpclient__closenotify(&closemfd,
1368 RUMPCLIENT_CLOSE_FCLOSEM) == -1)
1369 return -1;
1370 op_fcntl = GETSYSCALL(host, FCNTL);
1371 rv = op_fcntl(closemfd, cmd);
1372 if (rv)
1373 return rv;
1374 }
1375
1376 /*
1377 * Additionally, we want to do a rump closem, but only
1378 * for the file descriptors not dup2'd.
1379 */
1380
1381 for (i = 0, maxdup2 = 0; i <= DUP2HIGH; i++) {
1382 if (dup2vec[i] & DUP2BIT) {
1383 int val;
1384
1385 val = dup2vec[i] & DUP2FDMASK;
1386 maxdup2 = MAX(val, maxdup2);
1387 }
1388 }
1389
1390 if (fd >= hijack_fdoff)
1391 fd -= hijack_fdoff;
1392 else
1393 fd = 0;
1394 fd = MAX(maxdup2+1, fd);
1395
1396 /* hmm, maybe we should close rump fd's not within dup2mask? */
1397 return rump_sys_fcntl(fd, F_CLOSEM);
1398 }
1399 #endif /* F_CLOSEM */
1400
1401 #ifdef F_MAXFD
1402 case F_MAXFD:
1403 /*
1404 * For maxfd, if there's a rump kernel fd, return
1405 * it hostified. Otherwise, return host's MAXFD
1406 * return value.
1407 */
1408 if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) {
1409 /*
1410 * This might go a little wrong in case
1411 * of dup2 to [012], but I'm not sure if
1412 * there's a justification for tracking
1413 * that info. Consider e.g.
1414 * dup2(rumpfd, 2) followed by rump_sys_open()
1415 * returning 1. We should return 1+HIJACKOFF,
1416 * not 2+HIJACKOFF. However, if [01] is not
1417 * open, the correct return value is 2.
1418 */
1419 return fd_rump2host(fd);
1420 } else {
1421 op_fcntl = GETSYSCALL(host, FCNTL);
1422 return op_fcntl(fd, F_MAXFD);
1423 }
1424 /*NOTREACHED*/
1425 #endif /* F_MAXFD */
1426
1427 default:
1428 if (fd_isrump(fd)) {
1429 fd = fd_host2rump(fd);
1430 op_fcntl = GETSYSCALL(rump, FCNTL);
1431 } else {
1432 op_fcntl = GETSYSCALL(host, FCNTL);
1433 }
1434
1435 va_start(ap, cmd);
1436 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
1437 va_end(ap);
1438 return rv;
1439 }
1440 /*NOTREACHED*/
1441 }
1442
1443 int
1444 close(int fd)
1445 {
1446 int (*op_close)(int);
1447 int rv;
1448
1449 DPRINTF(("close -> %d\n", fd));
1450 if (fd_isrump(fd)) {
1451 bool undup2 = false;
1452 int ofd;
1453
1454 if (isdup2d(ofd = fd)) {
1455 undup2 = true;
1456 }
1457
1458 fd = fd_host2rump(fd);
1459 if (!undup2 && killdup2alias(fd)) {
1460 return 0;
1461 }
1462
1463 op_close = GETSYSCALL(rump, CLOSE);
1464 rv = op_close(fd);
1465 if (rv == 0 && undup2) {
1466 clrdup2(ofd);
1467 }
1468 } else {
1469 if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1)
1470 return -1;
1471 op_close = GETSYSCALL(host, CLOSE);
1472 rv = op_close(fd);
1473 }
1474
1475 return rv;
1476 }
1477
1478 /*
1479 * write cannot issue a standard debug printf due to recursion
1480 */
1481 ssize_t
1482 write(int fd, const void *buf, size_t blen)
1483 {
1484 ssize_t (*op_write)(int, const void *, size_t);
1485
1486 if (fd_isrump(fd)) {
1487 fd = fd_host2rump(fd);
1488 op_write = GETSYSCALL(rump, WRITE);
1489 } else {
1490 op_write = GETSYSCALL(host, WRITE);
1491 }
1492
1493 return op_write(fd, buf, blen);
1494 }
1495
1496 /*
1497 * file descriptor passing
1498 *
1499 * we intercept sendmsg and recvmsg to convert file descriptors in
1500 * control messages. an attempt to send a descriptor from a different kernel
1501 * is rejected. (ENOTSUP)
1502 */
1503
1504 static int
1505 msg_convert(struct msghdr *msg, int (*func)(int))
1506 {
1507 struct cmsghdr *cmsg;
1508
1509 for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1510 cmsg = CMSG_NXTHDR(msg, cmsg)) {
1511 if (cmsg->cmsg_level == SOL_SOCKET &&
1512 cmsg->cmsg_type == SCM_RIGHTS) {
1513 int *fdp = (void *)CMSG_DATA(cmsg);
1514 const size_t size =
1515 cmsg->cmsg_len - __CMSG_ALIGN(sizeof(*cmsg));
1516 const int nfds = (int)(size / sizeof(int));
1517 const int * const efdp = fdp + nfds;
1518
1519 while (fdp < efdp) {
1520 const int newval = func(*fdp);
1521
1522 if (newval < 0) {
1523 return ENOTSUP;
1524 }
1525 *fdp = newval;
1526 fdp++;
1527 }
1528 }
1529 }
1530 return 0;
1531 }
1532
1533 ssize_t
1534 recvmsg(int fd, struct msghdr *msg, int flags)
1535 {
1536 ssize_t (*op_recvmsg)(int, struct msghdr *, int);
1537 ssize_t ret;
1538 const bool isrump = fd_isrump(fd);
1539
1540 if (isrump) {
1541 fd = fd_host2rump(fd);
1542 op_recvmsg = GETSYSCALL(rump, RECVMSG);
1543 } else {
1544 op_recvmsg = GETSYSCALL(host, RECVMSG);
1545 }
1546 ret = op_recvmsg(fd, msg, flags);
1547 if (ret == -1) {
1548 return ret;
1549 }
1550 /*
1551 * convert descriptors in the message.
1552 */
1553 if (isrump) {
1554 msg_convert(msg, fd_rump2host);
1555 } else {
1556 msg_convert(msg, fd_host2host);
1557 }
1558 return ret;
1559 }
1560
1561 ssize_t
1562 recv(int fd, void *buf, size_t len, int flags)
1563 {
1564
1565 return recvfrom(fd, buf, len, flags, NULL, NULL);
1566 }
1567
1568 ssize_t
1569 send(int fd, const void *buf, size_t len, int flags)
1570 {
1571
1572 return sendto(fd, buf, len, flags, NULL, 0);
1573 }
1574
1575 static int
1576 fd_check_rump(int fd)
1577 {
1578
1579 return fd_isrump(fd) ? 0 : -1;
1580 }
1581
1582 static int
1583 fd_check_host(int fd)
1584 {
1585
1586 return !fd_isrump(fd) ? 0 : -1;
1587 }
1588
1589 ssize_t
1590 sendmsg(int fd, const struct msghdr *msg, int flags)
1591 {
1592 ssize_t (*op_sendmsg)(int, const struct msghdr *, int);
1593 const bool isrump = fd_isrump(fd);
1594 int error;
1595
1596 /*
1597 * reject descriptors from a different kernel.
1598 */
1599 error = msg_convert(__UNCONST(msg),
1600 isrump ? fd_check_rump: fd_check_host);
1601 if (error != 0) {
1602 errno = error;
1603 return -1;
1604 }
1605 /*
1606 * convert descriptors in the message to raw values.
1607 */
1608 if (isrump) {
1609 fd = fd_host2rump(fd);
1610 /*
1611 * XXX we directly modify the given message assuming:
1612 * - cmsg is writable (typically on caller's stack)
1613 * - caller don't care cmsg's contents after calling sendmsg.
1614 * (thus no need to restore values)
1615 *
1616 * it's safer to copy and modify instead.
1617 */
1618 msg_convert(__UNCONST(msg), fd_host2rump);
1619 op_sendmsg = GETSYSCALL(rump, SENDMSG);
1620 } else {
1621 op_sendmsg = GETSYSCALL(host, SENDMSG);
1622 }
1623 return op_sendmsg(fd, msg, flags);
1624 }
1625
1626 /*
1627 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
1628 * many programs do that. dup2 of a rump kernel fd to another value
1629 * not >= fdoff is an error.
1630 *
1631 * Note: cannot rump2host newd, because it is often hardcoded.
1632 */
1633 int
1634 dup2(int oldd, int newd)
1635 {
1636 int (*host_dup2)(int, int);
1637 int rv;
1638
1639 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
1640
1641 if (fd_isrump(oldd)) {
1642 int (*op_close)(int) = GETSYSCALL(host, CLOSE);
1643
1644 /* only allow fd 0-2 for cross-kernel dup */
1645 if (!(newd >= 0 && newd <= 2 && !fd_isrump(newd))) {
1646 errno = EBADF;
1647 return -1;
1648 }
1649
1650 /* regular dup2? */
1651 if (fd_isrump(newd)) {
1652 newd = fd_host2rump(newd);
1653 rv = rump_sys_dup2(oldd, newd);
1654 return fd_rump2host(rv);
1655 }
1656
1657 /*
1658 * dup2 rump => host? just establish an
1659 * entry in the mapping table.
1660 */
1661 op_close(newd);
1662 setdup2(newd, fd_host2rump(oldd));
1663 rv = 0;
1664 } else {
1665 host_dup2 = syscalls[DUALCALL_DUP2].bs_host;
1666 if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1)
1667 return -1;
1668 rv = host_dup2(oldd, newd);
1669 }
1670
1671 return rv;
1672 }
1673
1674 int
1675 dup(int oldd)
1676 {
1677
1678 return dodup(oldd, 0);
1679 }
1680
1681 pid_t
1682 fork(void)
1683 {
1684 pid_t rv;
1685
1686 DPRINTF(("fork\n"));
1687
1688 rv = rumpclient__dofork(host_fork);
1689
1690 DPRINTF(("fork returns %d\n", rv));
1691 return rv;
1692 }
1693 #ifdef VFORK
1694 /* we do not have the luxury of not requiring a stackframe */
1695 __strong_alias(VFORK,fork);
1696 #endif
1697
1698 int
1699 daemon(int nochdir, int noclose)
1700 {
1701 struct rumpclient_fork *rf;
1702
1703 if ((rf = rumpclient_prefork()) == NULL)
1704 return -1;
1705
1706 if (host_daemon(nochdir, noclose) == -1)
1707 return -1;
1708
1709 if (rumpclient_fork_init(rf) == -1)
1710 return -1;
1711
1712 return 0;
1713 }
1714
1715 int
1716 execve(const char *path, char *const argv[], char *const envp[])
1717 {
1718 char buf[128];
1719 char *dup2str;
1720 const char *pwdinrumpstr;
1721 char **newenv;
1722 size_t nelem;
1723 int rv, sverrno;
1724 int bonus = 2, i = 0;
1725
1726 snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2INFO=%u,%u,%u",
1727 dup2vec[0], dup2vec[1], dup2vec[2]);
1728 dup2str = strdup(buf);
1729 if (dup2str == NULL) {
1730 errno = ENOMEM;
1731 return -1;
1732 }
1733
1734 if (pwdinrump) {
1735 pwdinrumpstr = "RUMPHIJACK__PWDINRUMP=true";
1736 bonus++;
1737 } else {
1738 pwdinrumpstr = NULL;
1739 }
1740
1741 for (nelem = 0; envp && envp[nelem]; nelem++)
1742 continue;
1743 newenv = malloc(sizeof(*newenv) * (nelem+bonus));
1744 if (newenv == NULL) {
1745 free(dup2str);
1746 errno = ENOMEM;
1747 return -1;
1748 }
1749 memcpy(newenv, envp, nelem*sizeof(*newenv));
1750 newenv[nelem+i] = dup2str;
1751 i++;
1752
1753 if (pwdinrumpstr) {
1754 newenv[nelem+i] = __UNCONST(pwdinrumpstr);
1755 i++;
1756 }
1757 newenv[nelem+i] = NULL;
1758 _DIAGASSERT(i < bonus);
1759
1760 rv = rumpclient_exec(path, argv, newenv);
1761
1762 _DIAGASSERT(rv != 0);
1763 sverrno = errno;
1764 free(newenv);
1765 free(dup2str);
1766 errno = sverrno;
1767 return rv;
1768 }
1769
1770 /*
1771 * select is done by calling poll.
1772 */
1773 int
1774 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
1775 struct timeval *timeout)
1776 {
1777 struct pollfd *pfds;
1778 struct timespec ts, *tsp = NULL;
1779 nfds_t realnfds;
1780 int i, j;
1781 int rv, incr;
1782
1783 DPRINTF(("select %d %p %p %p %p\n", nfds,
1784 readfds, writefds, exceptfds, timeout));
1785
1786 /*
1787 * Well, first we must scan the fds to figure out how many
1788 * fds there really are. This is because up to and including
1789 * nb5 poll() silently refuses nfds > process_maxopen_fds.
1790 * Seems to be fixed in current, thank the maker.
1791 * god damn cluster...bomb.
1792 */
1793
1794 for (i = 0, realnfds = 0; i < nfds; i++) {
1795 if (readfds && FD_ISSET(i, readfds)) {
1796 realnfds++;
1797 continue;
1798 }
1799 if (writefds && FD_ISSET(i, writefds)) {
1800 realnfds++;
1801 continue;
1802 }
1803 if (exceptfds && FD_ISSET(i, exceptfds)) {
1804 realnfds++;
1805 continue;
1806 }
1807 }
1808
1809 if (realnfds) {
1810 pfds = calloc(realnfds, sizeof(*pfds));
1811 if (!pfds)
1812 return -1;
1813 } else {
1814 pfds = NULL;
1815 }
1816
1817 for (i = 0, j = 0; i < nfds; i++) {
1818 incr = 0;
1819 if (readfds && FD_ISSET(i, readfds)) {
1820 pfds[j].fd = i;
1821 pfds[j].events |= POLLIN;
1822 incr=1;
1823 }
1824 if (writefds && FD_ISSET(i, writefds)) {
1825 pfds[j].fd = i;
1826 pfds[j].events |= POLLOUT;
1827 incr=1;
1828 }
1829 if (exceptfds && FD_ISSET(i, exceptfds)) {
1830 pfds[j].fd = i;
1831 pfds[j].events |= POLLHUP|POLLERR;
1832 incr=1;
1833 }
1834 if (incr)
1835 j++;
1836 }
1837 assert(j == (int)realnfds);
1838
1839 if (timeout) {
1840 TIMEVAL_TO_TIMESPEC(timeout, &ts);
1841 tsp = &ts;
1842 }
1843 rv = REALPOLLTS(pfds, realnfds, tsp, NULL);
1844 /*
1845 * "If select() returns with an error the descriptor sets
1846 * will be unmodified"
1847 */
1848 if (rv < 0)
1849 goto out;
1850
1851 /*
1852 * zero out results (can't use FD_ZERO for the
1853 * obvious select-me-not reason). whee.
1854 *
1855 * We do this here since some software ignores the return
1856 * value of select, and hence if the timeout expires, it may
1857 * assume all input descriptors have activity.
1858 */
1859 for (i = 0; i < nfds; i++) {
1860 if (readfds)
1861 FD_CLR(i, readfds);
1862 if (writefds)
1863 FD_CLR(i, writefds);
1864 if (exceptfds)
1865 FD_CLR(i, exceptfds);
1866 }
1867 if (rv == 0)
1868 goto out;
1869
1870 /*
1871 * We have >0 fds with activity. Harvest the results.
1872 */
1873 for (i = 0; i < (int)realnfds; i++) {
1874 if (readfds) {
1875 if (pfds[i].revents & POLLIN) {
1876 FD_SET(pfds[i].fd, readfds);
1877 }
1878 }
1879 if (writefds) {
1880 if (pfds[i].revents & POLLOUT) {
1881 FD_SET(pfds[i].fd, writefds);
1882 }
1883 }
1884 if (exceptfds) {
1885 if (pfds[i].revents & (POLLHUP|POLLERR)) {
1886 FD_SET(pfds[i].fd, exceptfds);
1887 }
1888 }
1889 }
1890
1891 out:
1892 free(pfds);
1893 return rv;
1894 }
1895
1896 static void
1897 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
1898 {
1899 nfds_t i;
1900
1901 for (i = 0; i < nfds; i++) {
1902 if (fds[i].fd == -1)
1903 continue;
1904
1905 if (fd_isrump(fds[i].fd))
1906 (*rumpcall)++;
1907 else
1908 (*hostcall)++;
1909 }
1910 }
1911
1912 static void
1913 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
1914 {
1915 nfds_t i;
1916
1917 for (i = 0; i < nfds; i++) {
1918 fds[i].fd = fdadj(fds[i].fd);
1919 }
1920 }
1921
1922 /*
1923 * poll is easy as long as the call comes in the fds only in one
1924 * kernel. otherwise its quite tricky...
1925 */
1926 struct pollarg {
1927 struct pollfd *pfds;
1928 nfds_t nfds;
1929 const struct timespec *ts;
1930 const sigset_t *sigmask;
1931 int pipefd;
1932 int errnum;
1933 };
1934
1935 static void *
1936 hostpoll(void *arg)
1937 {
1938 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
1939 const sigset_t *);
1940 struct pollarg *parg = arg;
1941 intptr_t rv;
1942
1943 op_pollts = GETSYSCALL(host, POLLTS);
1944 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
1945 if (rv == -1)
1946 parg->errnum = errno;
1947 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
1948
1949 return (void *)rv;
1950 }
1951
1952 int
1953 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
1954 const sigset_t *sigmask)
1955 {
1956 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
1957 const sigset_t *);
1958 int (*host_close)(int);
1959 int hostcall = 0, rumpcall = 0;
1960 pthread_t pt;
1961 nfds_t i;
1962 int rv;
1963
1964 DPRINTF(("poll %p %d %p %p\n", fds, (int)nfds, ts, sigmask));
1965 checkpoll(fds, nfds, &hostcall, &rumpcall);
1966
1967 if (hostcall && rumpcall) {
1968 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
1969 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
1970 struct pollarg parg;
1971 void *trv_val;
1972 int sverrno = 0, rv_rump, rv_host, errno_rump, errno_host;
1973
1974 /*
1975 * ok, this is where it gets tricky. We must support
1976 * this since it's a very common operation in certain
1977 * types of software (telnet, netcat, etc). We allocate
1978 * two vectors and run two poll commands in separate
1979 * threads. Whichever returns first "wins" and the
1980 * other kernel's fds won't show activity.
1981 */
1982 rv = -1;
1983
1984 /* allocate full vector for O(n) joining after call */
1985 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
1986 if (!pfd_host)
1987 goto out;
1988 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
1989 if (!pfd_rump) {
1990 goto out;
1991 }
1992
1993 /*
1994 * then, open two pipes, one for notifications
1995 * to each kernel.
1996 *
1997 * At least the rump pipe should probably be
1998 * cached, along with the helper threads. This
1999 * should give a microbenchmark improvement (haven't
2000 * experienced a macro-level problem yet, though).
2001 */
2002 if ((rv = rump_sys_pipe(rpipe)) == -1) {
2003 sverrno = errno;
2004 }
2005 if (rv == 0 && (rv = pipe(hpipe)) == -1) {
2006 sverrno = errno;
2007 }
2008
2009 /* split vectors (or signal errors) */
2010 for (i = 0; i < nfds; i++) {
2011 int fd;
2012
2013 fds[i].revents = 0;
2014 if (fds[i].fd == -1) {
2015 pfd_host[i].fd = -1;
2016 pfd_rump[i].fd = -1;
2017 } else if (fd_isrump(fds[i].fd)) {
2018 pfd_host[i].fd = -1;
2019 fd = fd_host2rump(fds[i].fd);
2020 if (fd == rpipe[0] || fd == rpipe[1]) {
2021 fds[i].revents = POLLNVAL;
2022 if (rv != -1)
2023 rv++;
2024 }
2025 pfd_rump[i].fd = fd;
2026 pfd_rump[i].events = fds[i].events;
2027 } else {
2028 pfd_rump[i].fd = -1;
2029 fd = fds[i].fd;
2030 if (fd == hpipe[0] || fd == hpipe[1]) {
2031 fds[i].revents = POLLNVAL;
2032 if (rv != -1)
2033 rv++;
2034 }
2035 pfd_host[i].fd = fd;
2036 pfd_host[i].events = fds[i].events;
2037 }
2038 pfd_rump[i].revents = pfd_host[i].revents = 0;
2039 }
2040 if (rv) {
2041 goto out;
2042 }
2043
2044 pfd_host[nfds].fd = hpipe[0];
2045 pfd_host[nfds].events = POLLIN;
2046 pfd_rump[nfds].fd = rpipe[0];
2047 pfd_rump[nfds].events = POLLIN;
2048
2049 /*
2050 * then, create a thread to do host part and meanwhile
2051 * do rump kernel part right here
2052 */
2053
2054 parg.pfds = pfd_host;
2055 parg.nfds = nfds+1;
2056 parg.ts = ts;
2057 parg.sigmask = sigmask;
2058 parg.pipefd = rpipe[1];
2059 pthread_create(&pt, NULL, hostpoll, &parg);
2060
2061 op_pollts = GETSYSCALL(rump, POLLTS);
2062 rv_rump = op_pollts(pfd_rump, nfds+1, ts, NULL);
2063 errno_rump = errno;
2064 write(hpipe[1], &rv, sizeof(rv));
2065 pthread_join(pt, &trv_val);
2066 rv_host = (int)(intptr_t)trv_val;
2067 errno_host = parg.errnum;
2068
2069 /* strip cross-thread notification from real results */
2070 if (pfd_host[nfds].revents & POLLIN) {
2071 assert((pfd_rump[nfds].revents & POLLIN) == 0);
2072 assert(rv_host > 0);
2073 rv_host--;
2074 }
2075 if (pfd_rump[nfds].revents & POLLIN) {
2076 assert((pfd_host[nfds].revents & POLLIN) == 0);
2077 assert(rv_rump > 0);
2078 rv_rump--;
2079 }
2080
2081 /* then merge the results into what's reported to the caller */
2082 if (rv_rump > 0 || rv_host > 0) {
2083 /* SUCCESS */
2084
2085 rv = 0;
2086 if (rv_rump > 0) {
2087 for (i = 0; i < nfds; i++) {
2088 if (pfd_rump[i].fd != -1)
2089 fds[i].revents
2090 = pfd_rump[i].revents;
2091 }
2092 rv += rv_rump;
2093 }
2094 if (rv_host > 0) {
2095 for (i = 0; i < nfds; i++) {
2096 if (pfd_host[i].fd != -1)
2097 fds[i].revents
2098 = pfd_host[i].revents;
2099 }
2100 rv += rv_host;
2101 }
2102 assert(rv > 0);
2103 sverrno = 0;
2104 } else if (rv_rump == -1 || rv_host == -1) {
2105 /* ERROR */
2106
2107 /* just pick one kernel at "random" */
2108 rv = -1;
2109 if (rv_host == -1) {
2110 sverrno = errno_host;
2111 } else if (rv_rump == -1) {
2112 sverrno = errno_rump;
2113 }
2114 } else {
2115 /* TIMEOUT */
2116
2117 rv = 0;
2118 assert(rv_rump == 0 && rv_host == 0);
2119 }
2120
2121 out:
2122 host_close = GETSYSCALL(host, CLOSE);
2123 if (rpipe[0] != -1)
2124 rump_sys_close(rpipe[0]);
2125 if (rpipe[1] != -1)
2126 rump_sys_close(rpipe[1]);
2127 if (hpipe[0] != -1)
2128 host_close(hpipe[0]);
2129 if (hpipe[1] != -1)
2130 host_close(hpipe[1]);
2131 free(pfd_host);
2132 free(pfd_rump);
2133 errno = sverrno;
2134 } else {
2135 if (hostcall) {
2136 op_pollts = GETSYSCALL(host, POLLTS);
2137 } else {
2138 op_pollts = GETSYSCALL(rump, POLLTS);
2139 adjustpoll(fds, nfds, fd_host2rump);
2140 }
2141
2142 rv = op_pollts(fds, nfds, ts, sigmask);
2143 if (rumpcall)
2144 adjustpoll(fds, nfds, fd_rump2host_withdup);
2145 }
2146
2147 return rv;
2148 }
2149
2150 int
2151 poll(struct pollfd *fds, nfds_t nfds, int timeout)
2152 {
2153 struct timespec ts;
2154 struct timespec *tsp = NULL;
2155
2156 if (timeout != INFTIM) {
2157 ts.tv_sec = timeout / 1000;
2158 ts.tv_nsec = (timeout % 1000) * 1000*1000;
2159
2160 tsp = &ts;
2161 }
2162
2163 return REALPOLLTS(fds, nfds, tsp, NULL);
2164 }
2165
2166 #ifdef PLATFORM_HAS_KQUEUE
2167 int
2168 REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges,
2169 struct kevent *eventlist, size_t nevents,
2170 const struct timespec *timeout)
2171 {
2172 int (*op_kevent)(int, const struct kevent *, size_t,
2173 struct kevent *, size_t, const struct timespec *);
2174 const struct kevent *ev;
2175 size_t i;
2176
2177 /*
2178 * Check that we don't attempt to kevent rump kernel fd's.
2179 * That needs similar treatment to select/poll, but is slightly
2180 * trickier since we need to manage to different kq descriptors.
2181 * (TODO, in case you're wondering).
2182 */
2183 for (i = 0; i < nchanges; i++) {
2184 ev = &changelist[i];
2185 if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE ||
2186 ev->filter == EVFILT_VNODE) {
2187 if (fd_isrump((int)ev->ident)) {
2188 errno = ENOTSUP;
2189 return -1;
2190 }
2191 }
2192 }
2193
2194 op_kevent = GETSYSCALL(host, KEVENT);
2195 return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout);
2196 }
2197 #endif /* PLATFORM_HAS_KQUEUE */
2198
2199 /*
2200 * mmapping from a rump kernel is not supported, so disallow it.
2201 */
2202 void *
2203 mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset)
2204 {
2205
2206 if (flags & MAP_FILE && fd_isrump(fd)) {
2207 errno = ENOSYS;
2208 return MAP_FAILED;
2209 }
2210 return host_mmap(addr, len, prot, flags, fd, offset);
2211 }
2212
2213 #ifdef PLATFORM_HAS_NBSYSCTL
2214 /*
2215 * these go to one or the other on a per-process configuration
2216 */
2217 int __sysctl(const int *, unsigned int, void *, size_t *, const void *, size_t);
2218 int
2219 __sysctl(const int *name, unsigned int namelen, void *old, size_t *oldlenp,
2220 const void *new, size_t newlen)
2221 {
2222 int (*op___sysctl)(const int *, unsigned int, void *, size_t *,
2223 const void *, size_t);
2224
2225 if (rumpsysctl) {
2226 op___sysctl = GETSYSCALL(rump, __SYSCTL);
2227 } else {
2228 op___sysctl = GETSYSCALL(host, __SYSCTL);
2229 /* we haven't inited yet */
2230 if (__predict_false(op___sysctl == NULL)) {
2231 op___sysctl = rumphijack_dlsym(RTLD_NEXT, "__sysctl");
2232 }
2233 }
2234
2235 return op___sysctl(name, namelen, old, oldlenp, new, newlen);
2236 }
2237 #endif
2238
2239 /*
2240 * Rest are std type calls.
2241 */
2242
2243 FDCALL(int, bind, DUALCALL_BIND, \
2244 (int fd, const struct sockaddr *name, socklen_t namelen), \
2245 (int, const struct sockaddr *, socklen_t), \
2246 (fd, name, namelen))
2247
2248 FDCALL(int, connect, DUALCALL_CONNECT, \
2249 (int fd, const struct sockaddr *name, socklen_t namelen), \
2250 (int, const struct sockaddr *, socklen_t), \
2251 (fd, name, namelen))
2252
2253 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \
2254 (int fd, struct sockaddr *name, socklen_t *namelen), \
2255 (int, struct sockaddr *, socklen_t *), \
2256 (fd, name, namelen))
2257
2258 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \
2259 (int fd, struct sockaddr *name, socklen_t *namelen), \
2260 (int, struct sockaddr *, socklen_t *), \
2261 (fd, name, namelen))
2262
2263 FDCALL(int, listen, DUALCALL_LISTEN, \
2264 (int fd, int backlog), \
2265 (int, int), \
2266 (fd, backlog))
2267
2268 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \
2269 (int fd, void *buf, size_t len, int flags, \
2270 struct sockaddr *from, socklen_t *fromlen), \
2271 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \
2272 (fd, buf, len, flags, from, fromlen))
2273
2274 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \
2275 (int fd, const void *buf, size_t len, int flags, \
2276 const struct sockaddr *to, socklen_t tolen), \
2277 (int, const void *, size_t, int, \
2278 const struct sockaddr *, socklen_t), \
2279 (fd, buf, len, flags, to, tolen))
2280
2281 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \
2282 (int fd, int level, int optn, void *optval, socklen_t *optlen), \
2283 (int, int, int, void *, socklen_t *), \
2284 (fd, level, optn, optval, optlen))
2285
2286 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \
2287 (int fd, int level, int optn, \
2288 const void *optval, socklen_t optlen), \
2289 (int, int, int, const void *, socklen_t), \
2290 (fd, level, optn, optval, optlen))
2291
2292 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \
2293 (int fd, int how), \
2294 (int, int), \
2295 (fd, how))
2296
2297 FDCALL(ssize_t, REALREAD, DUALCALL_READ, \
2298 (int fd, void *buf, size_t buflen), \
2299 (int, void *, size_t), \
2300 (fd, buf, buflen))
2301
2302 #ifdef __linux__
2303 ssize_t __read_chk(int, void *, size_t)
2304 __attribute__((alias("read")));
2305 #endif
2306
2307 FDCALL(ssize_t, readv, DUALCALL_READV, \
2308 (int fd, const struct iovec *iov, int iovcnt), \
2309 (int, const struct iovec *, int), \
2310 (fd, iov, iovcnt))
2311
2312 FDCALL(ssize_t, REALPREAD, DUALCALL_PREAD, \
2313 (int fd, void *buf, size_t nbytes, off_t offset), \
2314 (int, void *, size_t, off_t), \
2315 (fd, buf, nbytes, offset))
2316
2317 FDCALL(ssize_t, preadv, DUALCALL_PREADV, \
2318 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \
2319 (int, const struct iovec *, int, off_t), \
2320 (fd, iov, iovcnt, offset))
2321
2322 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \
2323 (int fd, const struct iovec *iov, int iovcnt), \
2324 (int, const struct iovec *, int), \
2325 (fd, iov, iovcnt))
2326
2327 FDCALL(ssize_t, REALPWRITE, DUALCALL_PWRITE, \
2328 (int fd, const void *buf, size_t nbytes, off_t offset), \
2329 (int, const void *, size_t, off_t), \
2330 (fd, buf, nbytes, offset))
2331
2332 FDCALL(ssize_t, pwritev, DUALCALL_PWRITEV, \
2333 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \
2334 (int, const struct iovec *, int, off_t), \
2335 (fd, iov, iovcnt, offset))
2336
2337 #ifndef __linux__
2338 FDCALL(int, REALFSTAT, DUALCALL_FSTAT, \
2339 (int fd, struct stat *sb), \
2340 (int, struct stat *), \
2341 (fd, sb))
2342 #endif
2343
2344 #ifdef PLATFORM_HAS_NBVFSSTAT
2345 FDCALL(int, fstatvfs1, DUALCALL_FSTATVFS1, \
2346 (int fd, struct statvfs *buf, int flags), \
2347 (int, struct statvfs *, int), \
2348 (fd, buf, flags))
2349 #endif
2350
2351 FDCALL(off_t, lseek, DUALCALL_LSEEK, \
2352 (int fd, off_t offset, int whence), \
2353 (int, off_t, int), \
2354 (fd, offset, whence))
2355 #ifdef LSEEK_ALIAS
2356 __strong_alias(LSEEK_ALIAS,lseek);
2357 #endif
2358
2359 #ifndef __linux__
2360 FDCALL(int, REALGETDENTS, DUALCALL_GETDENTS, \
2361 (int fd, char *buf, size_t nbytes), \
2362 (int, char *, size_t), \
2363 (fd, buf, nbytes))
2364 #endif
2365
2366 FDCALL(int, fchown, DUALCALL_FCHOWN, \
2367 (int fd, uid_t owner, gid_t group), \
2368 (int, uid_t, gid_t), \
2369 (fd, owner, group))
2370
2371 FDCALL(int, fchmod, DUALCALL_FCHMOD, \
2372 (int fd, mode_t mode), \
2373 (int, mode_t), \
2374 (fd, mode))
2375
2376 FDCALL(int, ftruncate, DUALCALL_FTRUNCATE, \
2377 (int fd, off_t length), \
2378 (int, off_t), \
2379 (fd, length))
2380
2381 FDCALL(int, fsync, DUALCALL_FSYNC, \
2382 (int fd), \
2383 (int), \
2384 (fd))
2385
2386 #ifdef PLATFORM_HAS_FSYNC_RANGE
2387 FDCALL(int, fsync_range, DUALCALL_FSYNC_RANGE, \
2388 (int fd, int how, off_t start, off_t length), \
2389 (int, int, off_t, off_t), \
2390 (fd, how, start, length))
2391 #endif
2392
2393 FDCALL(int, futimes, DUALCALL_FUTIMES, \
2394 (int fd, const struct timeval *tv), \
2395 (int, const struct timeval *), \
2396 (fd, tv))
2397
2398 #ifdef PLATFORM_HAS_CHFLAGS
2399 FDCALL(int, fchflags, DUALCALL_FCHFLAGS, \
2400 (int fd, u_long flags), \
2401 (int, u_long), \
2402 (fd, flags))
2403 #endif
2404
2405 /*
2406 * path-based selectors
2407 */
2408
2409 #ifndef __linux__
2410 PATHCALL(int, REALSTAT, DUALCALL_STAT, \
2411 (const char *path, struct stat *sb), \
2412 (const char *, struct stat *), \
2413 (path, sb))
2414
2415 PATHCALL(int, REALLSTAT, DUALCALL_LSTAT, \
2416 (const char *path, struct stat *sb), \
2417 (const char *, struct stat *), \
2418 (path, sb))
2419 #endif
2420
2421 PATHCALL(int, chown, DUALCALL_CHOWN, \
2422 (const char *path, uid_t owner, gid_t group), \
2423 (const char *, uid_t, gid_t), \
2424 (path, owner, group))
2425
2426 PATHCALL(int, lchown, DUALCALL_LCHOWN, \
2427 (const char *path, uid_t owner, gid_t group), \
2428 (const char *, uid_t, gid_t), \
2429 (path, owner, group))
2430
2431 PATHCALL(int, chmod, DUALCALL_CHMOD, \
2432 (const char *path, mode_t mode), \
2433 (const char *, mode_t), \
2434 (path, mode))
2435
2436 PATHCALL(int, lchmod, DUALCALL_LCHMOD, \
2437 (const char *path, mode_t mode), \
2438 (const char *, mode_t), \
2439 (path, mode))
2440
2441 #ifdef PLATFORM_HAS_NBVFSSTAT
2442 PATHCALL(int, statvfs1, DUALCALL_STATVFS1, \
2443 (const char *path, struct statvfs *buf, int flags), \
2444 (const char *, struct statvfs *, int), \
2445 (path, buf, flags))
2446 #endif
2447
2448 PATHCALL(int, unlink, DUALCALL_UNLINK, \
2449 (const char *path), \
2450 (const char *), \
2451 (path))
2452
2453 PATHCALL(int, symlink, DUALCALL_SYMLINK, \
2454 (const char *target, const char *path), \
2455 (const char *, const char *), \
2456 (target, path))
2457
2458 /*
2459 * readlink() can be called from malloc which can be called
2460 * from dlsym() during init
2461 */
2462 ssize_t
2463 readlink(const char *path, char *buf, size_t bufsiz)
2464 {
2465 int (*op_readlink)(const char *, char *, size_t);
2466 enum pathtype pt;
2467
2468 if ((pt = path_isrump(path)) != PATH_HOST) {
2469 op_readlink = GETSYSCALL(rump, READLINK);
2470 if (pt == PATH_RUMP)
2471 path = path_host2rump(path);
2472 } else {
2473 op_readlink = GETSYSCALL(host, READLINK);
2474 }
2475
2476 if (__predict_false(op_readlink == NULL)) {
2477 errno = ENOENT;
2478 return -1;
2479 }
2480
2481 return op_readlink(path, buf, bufsiz);
2482 }
2483
2484 PATHCALL(int, mkdir, DUALCALL_MKDIR, \
2485 (const char *path, mode_t mode), \
2486 (const char *, mode_t), \
2487 (path, mode))
2488
2489 PATHCALL(int, rmdir, DUALCALL_RMDIR, \
2490 (const char *path), \
2491 (const char *), \
2492 (path))
2493
2494 PATHCALL(int, utimes, DUALCALL_UTIMES, \
2495 (const char *path, const struct timeval *tv), \
2496 (const char *, const struct timeval *), \
2497 (path, tv))
2498
2499 PATHCALL(int, lutimes, DUALCALL_LUTIMES, \
2500 (const char *path, const struct timeval *tv), \
2501 (const char *, const struct timeval *), \
2502 (path, tv))
2503
2504 #ifdef PLATFORM_HAS_CHFLAGS
2505 PATHCALL(int, chflags, DUALCALL_CHFLAGS, \
2506 (const char *path, u_long flags), \
2507 (const char *, u_long), \
2508 (path, flags))
2509
2510 PATHCALL(int, lchflags, DUALCALL_LCHFLAGS, \
2511 (const char *path, u_long flags), \
2512 (const char *, u_long), \
2513 (path, flags))
2514 #endif /* PLATFORM_HAS_CHFLAGS */
2515
2516 PATHCALL(int, truncate, DUALCALL_TRUNCATE, \
2517 (const char *path, off_t length), \
2518 (const char *, off_t), \
2519 (path, length))
2520
2521 PATHCALL(int, access, DUALCALL_ACCESS, \
2522 (const char *path, int mode), \
2523 (const char *, int), \
2524 (path, mode))
2525
2526 #ifndef __linux__
2527 PATHCALL(int, REALMKNOD, DUALCALL_MKNOD, \
2528 (const char *path, mode_t mode, dev_t dev), \
2529 (const char *, mode_t, dev_t), \
2530 (path, mode, dev))
2531 #endif
2532
2533 /*
2534 * Note: with mount the decisive parameter is the mount
2535 * destination directory. This is because we don't really know
2536 * about the "source" directory in a generic call (and besides,
2537 * it might not even exist, cf. nfs).
2538 */
2539 #ifdef PLATFORM_HAS_NBMOUNT
2540 PATHCALL(int, REALMOUNT, DUALCALL_MOUNT, \
2541 (const char *type, const char *path, int flags, \
2542 void *data, size_t dlen), \
2543 (const char *, const char *, int, void *, size_t), \
2544 (type, path, flags, data, dlen))
2545
2546 PATHCALL(int, unmount, DUALCALL_UNMOUNT, \
2547 (const char *path, int flags), \
2548 (const char *, int), \
2549 (path, flags))
2550 #endif /* PLATFORM_HAS_NBMOUNT */
2551
2552 #ifdef PLATFORM_HAS_NBQUOTA
2553 #if __NetBSD_Prereq__(5,99,63)
2554 PATHCALL(int, __quotactl, DUALCALL_QUOTACTL, \
2555 (const char *path, struct quotactl_args *args), \
2556 (const char *, struct quotactl_args *), \
2557 (path, args))
2558 #elif __NetBSD_Prereq__(5,99,48)
2559 PATHCALL(int, OLDREALQUOTACTL, DUALCALL_QUOTACTL, \
2560 (const char *path, struct plistref *p), \
2561 (const char *, struct plistref *), \
2562 (path, p))
2563 #endif
2564 #endif /* PLATFORM_HAS_NBQUOTA */
2565
2566 #ifdef PLATFORM_HAS_NBFILEHANDLE
2567 PATHCALL(int, REALGETFH, DUALCALL_GETFH, \
2568 (const char *path, void *fhp, size_t *fh_size), \
2569 (const char *, void *, size_t *), \
2570 (path, fhp, fh_size))
2571 #endif
2572
2573 /*
2574 * These act different on a per-process vfs configuration
2575 */
2576
2577 #ifdef PLATFORM_HAS_NBVFSSTAT
2578 VFSCALL(VFSBIT_GETVFSSTAT, int, getvfsstat, DUALCALL_GETVFSSTAT, \
2579 (struct statvfs *buf, size_t buflen, int flags), \
2580 (struct statvfs *, size_t, int), \
2581 (buf, buflen, flags))
2582 #endif
2583
2584 #ifdef PLATFORM_HAS_NBFILEHANDLE
2585 VFSCALL(VFSBIT_FHCALLS, int, REALFHOPEN, DUALCALL_FHOPEN, \
2586 (const void *fhp, size_t fh_size, int flags), \
2587 (const char *, size_t, int), \
2588 (fhp, fh_size, flags))
2589
2590 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTAT, DUALCALL_FHSTAT, \
2591 (const void *fhp, size_t fh_size, struct stat *sb), \
2592 (const char *, size_t, struct stat *), \
2593 (fhp, fh_size, sb))
2594
2595 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTATVFS1, DUALCALL_FHSTATVFS1, \
2596 (const void *fhp, size_t fh_size, struct statvfs *sb, int flgs),\
2597 (const char *, size_t, struct statvfs *, int), \
2598 (fhp, fh_size, sb, flgs))
2599 #endif
2600
2601
2602 #ifdef PLATFORM_HAS_NFSSVC
2603
2604 /* finally, put nfssvc here. "keep the namespace clean" */
2605 #include <nfs/rpcv2.h>
2606 #include <nfs/nfs.h>
2607
2608 int
2609 nfssvc(int flags, void *argstructp)
2610 {
2611 int (*op_nfssvc)(int, void *);
2612
2613 if (vfsbits & VFSBIT_NFSSVC){
2614 struct nfsd_args *nfsdargs;
2615
2616 /* massage the socket descriptor if necessary */
2617 if (flags == NFSSVC_ADDSOCK) {
2618 nfsdargs = argstructp;
2619 nfsdargs->sock = fd_host2rump(nfsdargs->sock);
2620 }
2621 op_nfssvc = GETSYSCALL(rump, NFSSVC);
2622 } else
2623 op_nfssvc = GETSYSCALL(host, NFSSVC);
2624
2625 return op_nfssvc(flags, argstructp);
2626 }
2627 #endif /* PLATFORM_HAS_NFSSVC */
2628