hijack.c revision 1.86 1 /* $NetBSD: hijack.c,v 1.86 2011/03/14 15:15:47 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __RCSID("$NetBSD: hijack.c,v 1.86 2011/03/14 15:15:47 pooka Exp $");
30
31 #define __ssp_weak_name(fun) _hijack_ ## fun
32
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/event.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/mount.h>
39 #include <sys/poll.h>
40 #include <sys/socket.h>
41 #include <sys/statvfs.h>
42
43 #include <rump/rumpclient.h>
44 #include <rump/rump_syscalls.h>
45
46 #include <assert.h>
47 #include <dlfcn.h>
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <poll.h>
52 #include <pthread.h>
53 #include <signal.h>
54 #include <stdarg.h>
55 #include <stdbool.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #include <time.h>
60 #include <unistd.h>
61
62 #include "hijack.h"
63
64 enum dualcall {
65 DUALCALL_WRITE, DUALCALL_WRITEV, DUALCALL_PWRITE, DUALCALL_PWRITEV,
66 DUALCALL_IOCTL, DUALCALL_FCNTL,
67 DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT,
68 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN,
69 DUALCALL_RECVFROM, DUALCALL_RECVMSG,
70 DUALCALL_SENDTO, DUALCALL_SENDMSG,
71 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT,
72 DUALCALL_SHUTDOWN,
73 DUALCALL_READ, DUALCALL_READV, DUALCALL_PREAD, DUALCALL_PREADV,
74 DUALCALL_DUP2,
75 DUALCALL_CLOSE,
76 DUALCALL_POLLTS,
77 DUALCALL_KEVENT,
78 DUALCALL_STAT, DUALCALL_LSTAT, DUALCALL_FSTAT,
79 DUALCALL_CHMOD, DUALCALL_LCHMOD, DUALCALL_FCHMOD,
80 DUALCALL_CHOWN, DUALCALL_LCHOWN, DUALCALL_FCHOWN,
81 DUALCALL_OPEN,
82 DUALCALL_STATVFS1, DUALCALL_FSTATVFS1,
83 DUALCALL_CHDIR, DUALCALL_FCHDIR,
84 DUALCALL_LSEEK,
85 DUALCALL_GETDENTS,
86 DUALCALL_UNLINK, DUALCALL_SYMLINK, DUALCALL_READLINK,
87 DUALCALL_RENAME,
88 DUALCALL_MKDIR, DUALCALL_RMDIR,
89 DUALCALL_UTIMES, DUALCALL_LUTIMES, DUALCALL_FUTIMES,
90 DUALCALL_TRUNCATE, DUALCALL_FTRUNCATE,
91 DUALCALL_FSYNC, DUALCALL_FSYNC_RANGE,
92 DUALCALL_MOUNT, DUALCALL_UNMOUNT,
93 DUALCALL___GETCWD,
94 DUALCALL_CHFLAGS, DUALCALL_LCHFLAGS, DUALCALL_FCHFLAGS,
95 DUALCALL_ACCESS,
96 DUALCALL_MKNOD,
97 DUALCALL___SYSCTL,
98 DUALCALL_GETVFSSTAT, DUALCALL_NFSSVC,
99 DUALCALL_GETFH, DUALCALL_FHOPEN, DUALCALL_FHSTAT, DUALCALL_FHSTATVFS1,
100 #if __NetBSD_Prereq__(5,99,48)
101 DUALCALL_QUOTACTL,
102 #endif
103 DUALCALL__NUM
104 };
105
106 #define RSYS_STRING(a) __STRING(a)
107 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
108
109 /*
110 * Would be nice to get this automatically in sync with libc.
111 * Also, this does not work for compat-using binaries!
112 */
113 #if !__NetBSD_Prereq__(5,99,7)
114 #define REALSELECT select
115 #define REALPOLLTS pollts
116 #define REALKEVENT kevent
117 #define REALSTAT __stat30
118 #define REALLSTAT __lstat30
119 #define REALFSTAT __fstat30
120 #define REALUTIMES utimes
121 #define REALLUTIMES lutimes
122 #define REALFUTIMES futimes
123 #define REALMKNOD mknod
124 #define REALFHSTAT __fhstat40
125 #else
126 #define REALSELECT _sys___select50
127 #define REALPOLLTS _sys___pollts50
128 #define REALKEVENT _sys___kevent50
129 #define REALSTAT __stat50
130 #define REALLSTAT __lstat50
131 #define REALFSTAT __fstat50
132 #define REALUTIMES __utimes50
133 #define REALLUTIMES __lutimes50
134 #define REALFUTIMES __futimes50
135 #define REALMKNOD __mknod50
136 #define REALFHSTAT __fhstat50
137 #endif
138 #define REALREAD _sys_read
139 #define REALPREAD _sys_pread
140 #define REALPWRITE _sys_pwrite
141 #define REALGETDENTS __getdents30
142 #define REALMOUNT __mount50
143 #define REALGETFH __getfh30
144 #define REALFHOPEN __fhopen40
145 #define REALFHSTATVFS1 __fhstatvfs140
146 #define REALQUOTACTL __quotactl50
147
148 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
149 int REALPOLLTS(struct pollfd *, nfds_t,
150 const struct timespec *, const sigset_t *);
151 int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t,
152 const struct timespec *);
153 ssize_t REALREAD(int, void *, size_t);
154 ssize_t REALPREAD(int, void *, size_t, off_t);
155 ssize_t REALPWRITE(int, const void *, size_t, off_t);
156 int REALSTAT(const char *, struct stat *);
157 int REALLSTAT(const char *, struct stat *);
158 int REALFSTAT(int, struct stat *);
159 int REALGETDENTS(int, char *, size_t);
160 int REALUTIMES(const char *, const struct timeval [2]);
161 int REALLUTIMES(const char *, const struct timeval [2]);
162 int REALFUTIMES(int, const struct timeval [2]);
163 int REALMOUNT(const char *, const char *, int, void *, size_t);
164 int __getcwd(char *, size_t);
165 int REALMKNOD(const char *, mode_t, dev_t);
166 int REALGETFH(const char *, void *, size_t *);
167 int REALFHOPEN(const void *, size_t, int);
168 int REALFHSTAT(const void *, size_t, struct stat *);
169 int REALFHSTATVFS1(const void *, size_t, struct statvfs *, int);
170 int REALQUOTACTL(const char *, struct plistref *);
171
172 #define S(a) __STRING(a)
173 struct sysnames {
174 enum dualcall scm_callnum;
175 const char *scm_hostname;
176 const char *scm_rumpname;
177 } syscnames[] = {
178 { DUALCALL_SOCKET, "__socket30", RSYS_NAME(SOCKET) },
179 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) },
180 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) },
181 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) },
182 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) },
183 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) },
184 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) },
185 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) },
186 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) },
187 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) },
188 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) },
189 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) },
190 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) },
191 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) },
192 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) },
193 { DUALCALL_READV, "readv", RSYS_NAME(READV) },
194 { DUALCALL_PREAD, S(REALPREAD), RSYS_NAME(PREAD) },
195 { DUALCALL_PREADV, "preadv", RSYS_NAME(PREADV) },
196 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) },
197 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) },
198 { DUALCALL_PWRITE, S(REALPWRITE), RSYS_NAME(PWRITE) },
199 { DUALCALL_PWRITEV, "pwritev", RSYS_NAME(PWRITEV) },
200 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) },
201 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) },
202 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) },
203 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) },
204 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) },
205 { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) },
206 { DUALCALL_STAT, S(REALSTAT), RSYS_NAME(STAT) },
207 { DUALCALL_LSTAT, S(REALLSTAT), RSYS_NAME(LSTAT) },
208 { DUALCALL_FSTAT, S(REALFSTAT), RSYS_NAME(FSTAT) },
209 { DUALCALL_CHOWN, "chown", RSYS_NAME(CHOWN) },
210 { DUALCALL_LCHOWN, "lchown", RSYS_NAME(LCHOWN) },
211 { DUALCALL_FCHOWN, "fchown", RSYS_NAME(FCHOWN) },
212 { DUALCALL_CHMOD, "chmod", RSYS_NAME(CHMOD) },
213 { DUALCALL_LCHMOD, "lchmod", RSYS_NAME(LCHMOD) },
214 { DUALCALL_FCHMOD, "fchmod", RSYS_NAME(FCHMOD) },
215 { DUALCALL_UTIMES, S(REALUTIMES), RSYS_NAME(UTIMES) },
216 { DUALCALL_LUTIMES, S(REALLUTIMES), RSYS_NAME(LUTIMES) },
217 { DUALCALL_FUTIMES, S(REALFUTIMES), RSYS_NAME(FUTIMES) },
218 { DUALCALL_OPEN, "open", RSYS_NAME(OPEN) },
219 { DUALCALL_STATVFS1, "statvfs1", RSYS_NAME(STATVFS1) },
220 { DUALCALL_FSTATVFS1, "fstatvfs1", RSYS_NAME(FSTATVFS1) },
221 { DUALCALL_CHDIR, "chdir", RSYS_NAME(CHDIR) },
222 { DUALCALL_FCHDIR, "fchdir", RSYS_NAME(FCHDIR) },
223 { DUALCALL_LSEEK, "lseek", RSYS_NAME(LSEEK) },
224 { DUALCALL_GETDENTS, "__getdents30", RSYS_NAME(GETDENTS) },
225 { DUALCALL_UNLINK, "unlink", RSYS_NAME(UNLINK) },
226 { DUALCALL_SYMLINK, "symlink", RSYS_NAME(SYMLINK) },
227 { DUALCALL_READLINK, "readlink", RSYS_NAME(READLINK) },
228 { DUALCALL_RENAME, "rename", RSYS_NAME(RENAME) },
229 { DUALCALL_MKDIR, "mkdir", RSYS_NAME(MKDIR) },
230 { DUALCALL_RMDIR, "rmdir", RSYS_NAME(RMDIR) },
231 { DUALCALL_TRUNCATE, "truncate", RSYS_NAME(TRUNCATE) },
232 { DUALCALL_FTRUNCATE, "ftruncate", RSYS_NAME(FTRUNCATE) },
233 { DUALCALL_FSYNC, "fsync", RSYS_NAME(FSYNC) },
234 { DUALCALL_FSYNC_RANGE, "fsync_range", RSYS_NAME(FSYNC_RANGE) },
235 { DUALCALL_MOUNT, S(REALMOUNT), RSYS_NAME(MOUNT) },
236 { DUALCALL_UNMOUNT, "unmount", RSYS_NAME(UNMOUNT) },
237 { DUALCALL___GETCWD, "__getcwd", RSYS_NAME(__GETCWD) },
238 { DUALCALL_CHFLAGS, "chflags", RSYS_NAME(CHFLAGS) },
239 { DUALCALL_LCHFLAGS, "lchflags", RSYS_NAME(LCHFLAGS) },
240 { DUALCALL_FCHFLAGS, "fchflags", RSYS_NAME(FCHFLAGS) },
241 { DUALCALL_ACCESS, "access", RSYS_NAME(ACCESS) },
242 { DUALCALL_MKNOD, S(REALMKNOD), RSYS_NAME(MKNOD) },
243 { DUALCALL___SYSCTL, "__sysctl", RSYS_NAME(__SYSCTL) },
244 { DUALCALL_GETVFSSTAT, "getvfsstat", RSYS_NAME(GETVFSSTAT) },
245 { DUALCALL_NFSSVC, "nfssvc", RSYS_NAME(NFSSVC) },
246 { DUALCALL_GETFH, S(REALGETFH), RSYS_NAME(GETFH) },
247 { DUALCALL_FHOPEN, S(REALFHOPEN),RSYS_NAME(FHOPEN) },
248 { DUALCALL_FHSTAT, S(REALFHSTAT),RSYS_NAME(FHSTAT) },
249 { DUALCALL_FHSTATVFS1, S(REALFHSTATVFS1),RSYS_NAME(FHSTATVFS1) },
250 #if __NetBSD_Prereq__(5,99,48)
251 { DUALCALL_QUOTACTL, S(REALQUOTACTL),RSYS_NAME(QUOTACTL) },
252 #endif
253 };
254 #undef S
255
256 struct bothsys {
257 void *bs_host;
258 void *bs_rump;
259 } syscalls[DUALCALL__NUM];
260 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which
261
262 static pid_t (*host_fork)(void);
263 static int (*host_daemon)(int, int);
264 static void * (*host_mmap)(void *, size_t, int, int, int, off_t);
265
266 /*
267 * This tracks if our process is in a subdirectory of /rump.
268 * It's preserved over exec.
269 */
270 static bool pwdinrump;
271
272 enum pathtype { PATH_HOST, PATH_RUMP, PATH_RUMPBLANKET };
273
274 static bool fd_isrump(int);
275 static enum pathtype path_isrump(const char *);
276
277 /* default FD_SETSIZE is 256 ==> default fdoff is 128 */
278 static int hijack_fdoff = FD_SETSIZE/2;
279
280 /*
281 * Maintain a mapping table for the usual dup2 suspects.
282 * Could use atomic ops to operate on dup2vec, but an application
283 * racing there is not well-defined, so don't bother.
284 */
285 /* note: you cannot change this without editing the env-passing code */
286 #define DUP2HIGH 2
287 static uint32_t dup2vec[DUP2HIGH+1];
288 #define DUP2BIT (1<<31)
289 #define DUP2ALIAS (1<<30)
290 #define DUP2FDMASK ((1<<30)-1)
291
292 static bool
293 isdup2d(int fd)
294 {
295
296 return fd <= DUP2HIGH && fd >= 0 && dup2vec[fd] & DUP2BIT;
297 }
298
299 static int
300 mapdup2(int hostfd)
301 {
302
303 _DIAGASSERT(isdup2d(hostfd));
304 return dup2vec[hostfd] & DUP2FDMASK;
305 }
306
307 static int
308 unmapdup2(int rumpfd)
309 {
310 int i;
311
312 for (i = 0; i <= DUP2HIGH; i++) {
313 if (dup2vec[i] & DUP2BIT &&
314 (dup2vec[i] & DUP2FDMASK) == (unsigned)rumpfd)
315 return i;
316 }
317 return -1;
318 }
319
320 static void
321 setdup2(int hostfd, int rumpfd)
322 {
323
324 if (hostfd > DUP2HIGH) {
325 _DIAGASSERT(0);
326 return;
327 }
328
329 dup2vec[hostfd] = DUP2BIT | DUP2ALIAS | rumpfd;
330 }
331
332 static void
333 clrdup2(int hostfd)
334 {
335
336 if (hostfd > DUP2HIGH) {
337 _DIAGASSERT(0);
338 return;
339 }
340
341 dup2vec[hostfd] = 0;
342 }
343
344 static bool
345 killdup2alias(int rumpfd)
346 {
347 int hostfd;
348
349 if ((hostfd = unmapdup2(rumpfd)) == -1)
350 return false;
351
352 if (dup2vec[hostfd] & DUP2ALIAS) {
353 dup2vec[hostfd] &= ~DUP2ALIAS;
354 return true;
355 }
356 return false;
357 }
358
359 //#define DEBUGJACK
360 #ifdef DEBUGJACK
361 #define DPRINTF(x) mydprintf x
362 static void
363 mydprintf(const char *fmt, ...)
364 {
365 va_list ap;
366
367 if (isdup2d(STDERR_FILENO))
368 return;
369
370 va_start(ap, fmt);
371 vfprintf(stderr, fmt, ap);
372 va_end(ap);
373 }
374
375 static const char *
376 whichfd(int fd)
377 {
378
379 if (fd == -1)
380 return "-1";
381 else if (fd_isrump(fd))
382 return "rump";
383 else
384 return "host";
385 }
386
387 static const char *
388 whichpath(const char *path)
389 {
390
391 if (path_isrump(path))
392 return "rump";
393 else
394 return "host";
395 }
396
397 #else
398 #define DPRINTF(x)
399 #endif
400
401 #define FDCALL(type, name, rcname, args, proto, vars) \
402 type name args \
403 { \
404 type (*fun) proto; \
405 \
406 DPRINTF(("%s -> %d (%s)\n", __STRING(name), fd, whichfd(fd))); \
407 if (fd_isrump(fd)) { \
408 fun = syscalls[rcname].bs_rump; \
409 fd = fd_host2rump(fd); \
410 } else { \
411 fun = syscalls[rcname].bs_host; \
412 } \
413 \
414 return fun vars; \
415 }
416
417 #define PATHCALL(type, name, rcname, args, proto, vars) \
418 type name args \
419 { \
420 type (*fun) proto; \
421 enum pathtype pt; \
422 \
423 DPRINTF(("%s -> %s (%s)\n", __STRING(name), path, \
424 whichpath(path))); \
425 if ((pt = path_isrump(path)) != PATH_HOST) { \
426 fun = syscalls[rcname].bs_rump; \
427 if (pt == PATH_RUMP) \
428 path = path_host2rump(path); \
429 } else { \
430 fun = syscalls[rcname].bs_host; \
431 } \
432 \
433 return fun vars; \
434 }
435
436 #define VFSCALL(bit, type, name, rcname, args, proto, vars) \
437 type name args \
438 { \
439 type (*fun) proto; \
440 \
441 DPRINTF(("%s (0x%x, 0x%x)\n", __STRING(name), bit, vfsbits)); \
442 if (vfsbits & bit) { \
443 fun = syscalls[rcname].bs_rump; \
444 } else { \
445 fun = syscalls[rcname].bs_host; \
446 } \
447 \
448 return fun vars; \
449 }
450
451 /*
452 * These variables are set from the RUMPHIJACK string and control
453 * which operations can product rump kernel file descriptors.
454 * This should be easily extendable for future needs.
455 */
456 #define RUMPHIJACK_DEFAULT "path=/rump,socket=all:nolocal"
457 static bool rumpsockets[PF_MAX];
458 static const char *rumpprefix;
459 static size_t rumpprefixlen;
460
461 static struct {
462 int pf;
463 const char *name;
464 } socketmap[] = {
465 { PF_LOCAL, "local" },
466 { PF_INET, "inet" },
467 { PF_LINK, "link" },
468 #ifdef PF_OROUTE
469 { PF_OROUTE, "oroute" },
470 #endif
471 { PF_ROUTE, "route" },
472 { PF_INET6, "inet6" },
473 #ifdef PF_MPLS
474 { PF_MPLS, "mpls" },
475 #endif
476 { -1, NULL }
477 };
478
479 static void
480 sockparser(char *buf)
481 {
482 char *p, *l;
483 bool value;
484 int i;
485
486 /* if "all" is present, it must be specified first */
487 if (strncmp(buf, "all", strlen("all")) == 0) {
488 for (i = 0; i < (int)__arraycount(rumpsockets); i++) {
489 rumpsockets[i] = true;
490 }
491 buf += strlen("all");
492 if (*buf == ':')
493 buf++;
494 }
495
496 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) {
497 value = true;
498 if (strncmp(p, "no", strlen("no")) == 0) {
499 value = false;
500 p += strlen("no");
501 }
502
503 for (i = 0; socketmap[i].name; i++) {
504 if (strcmp(p, socketmap[i].name) == 0) {
505 rumpsockets[socketmap[i].pf] = value;
506 break;
507 }
508 }
509 if (socketmap[i].name == NULL) {
510 errx(1, "invalid socket specifier %s", p);
511 }
512 }
513 }
514
515 static void
516 pathparser(char *buf)
517 {
518
519 /* sanity-check */
520 if (*buf != '/')
521 errx(1, "hijack path specifier must begin with ``/''");
522 rumpprefixlen = strlen(buf);
523 if (rumpprefixlen < 2)
524 errx(1, "invalid hijack prefix: %s", buf);
525 if (buf[rumpprefixlen-1] == '/' && strspn(buf, "/") != rumpprefixlen)
526 errx(1, "hijack prefix may end in slash only if pure "
527 "slash, gave %s", buf);
528
529 if ((rumpprefix = strdup(buf)) == NULL)
530 err(1, "strdup");
531 rumpprefixlen = strlen(rumpprefix);
532 }
533
534 static struct blanket {
535 const char *pfx;
536 size_t len;
537 } *blanket;
538 static int nblanket;
539
540 static void
541 blanketparser(char *buf)
542 {
543 char *p, *l;
544 int i;
545
546 for (nblanket = 0, p = buf; p; p = strchr(p+1, ':'), nblanket++)
547 continue;
548
549 blanket = malloc(nblanket * sizeof(*blanket));
550 if (blanket == NULL)
551 err(1, "alloc blanket %d", nblanket);
552
553 for (p = strtok_r(buf, ":", &l), i = 0; p;
554 p = strtok_r(NULL, ":", &l), i++) {
555 blanket[i].pfx = strdup(p);
556 if (blanket[i].pfx == NULL)
557 err(1, "strdup blanket");
558 blanket[i].len = strlen(p);
559
560 if (blanket[i].len == 0 || *blanket[i].pfx != '/')
561 errx(1, "invalid blanket specifier %s", p);
562 if (*(blanket[i].pfx + blanket[i].len-1) == '/')
563 errx(1, "invalid blanket specifier %s", p);
564 }
565 }
566
567 #define VFSBIT_NFSSVC 0x01
568 #define VFSBIT_GETVFSSTAT 0x02
569 #define VFSBIT_FHCALLS 0x04
570 static unsigned vfsbits;
571
572 static struct {
573 int bit;
574 const char *name;
575 } vfscalls[] = {
576 { VFSBIT_NFSSVC, "nfssvc" },
577 { VFSBIT_GETVFSSTAT, "getvfsstat" },
578 { VFSBIT_FHCALLS, "fhcalls" },
579 { -1, NULL }
580 };
581
582 static void
583 vfsparser(char *buf)
584 {
585 char *p, *l;
586 bool turnon;
587 unsigned int fullmask;
588 int i;
589
590 /* build the full mask and sanity-check while we're at it */
591 fullmask = 0;
592 for (i = 0; vfscalls[i].name != NULL; i++) {
593 if (fullmask & vfscalls[i].bit)
594 errx(1, "problem exists between vi and chair");
595 fullmask |= vfscalls[i].bit;
596 }
597
598
599 /* if "all" is present, it must be specified first */
600 if (strncmp(buf, "all", strlen("all")) == 0) {
601 vfsbits = fullmask;
602 buf += strlen("all");
603 if (*buf == ':')
604 buf++;
605 }
606
607 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) {
608 turnon = true;
609 if (strncmp(p, "no", strlen("no")) == 0) {
610 turnon = false;
611 p += strlen("no");
612 }
613
614 for (i = 0; vfscalls[i].name; i++) {
615 if (strcmp(p, vfscalls[i].name) == 0) {
616 if (turnon)
617 vfsbits |= vfscalls[i].bit;
618 else
619 vfsbits &= ~vfscalls[i].bit;
620 break;
621 }
622 }
623 if (vfscalls[i].name == NULL) {
624 errx(1, "invalid vfscall specifier %s", p);
625 }
626 }
627 }
628
629 static bool rumpsysctl = false;
630
631 static void
632 sysctlparser(char *buf)
633 {
634
635 if (buf == NULL) {
636 rumpsysctl = true;
637 return;
638 }
639
640 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 ||
641 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) {
642 rumpsysctl = true;
643 return;
644 }
645 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) {
646 rumpsysctl = false;
647 return;
648 }
649
650 errx(1, "sysctl value should be y(es)/n(o), gave: %s", buf);
651 }
652
653 static void
654 fdoffparser(char *buf)
655 {
656 unsigned long fdoff;
657 char *ep;
658
659 if (*buf == '-') {
660 errx(1, "fdoff must not be negative");
661 }
662 fdoff = strtoul(buf, &ep, 10);
663 if (*ep != '\0')
664 errx(1, "invalid fdoff specifier \"%s\"", buf);
665 if (fdoff >= INT_MAX/2 || fdoff < 3)
666 errx(1, "fdoff out of range");
667 hijack_fdoff = fdoff;
668 }
669
670 static struct {
671 void (*parsefn)(char *);
672 const char *name;
673 bool needvalues;
674 } hijackparse[] = {
675 { sockparser, "socket", true },
676 { pathparser, "path", true },
677 { blanketparser, "blanket", true },
678 { vfsparser, "vfs", true },
679 { sysctlparser, "sysctl", false },
680 { fdoffparser, "fdoff", true },
681 { NULL, NULL, false },
682 };
683
684 static void
685 parsehijack(char *hijack)
686 {
687 char *p, *p2, *l;
688 const char *hijackcopy;
689 bool nop2;
690 int i;
691
692 if ((hijackcopy = strdup(hijack)) == NULL)
693 err(1, "strdup");
694
695 /* disable everything explicitly */
696 for (i = 0; i < PF_MAX; i++)
697 rumpsockets[i] = false;
698
699 for (p = strtok_r(hijack, ",", &l); p; p = strtok_r(NULL, ",", &l)) {
700 nop2 = false;
701 p2 = strchr(p, '=');
702 if (!p2) {
703 nop2 = true;
704 p2 = p + strlen(p);
705 }
706
707 for (i = 0; hijackparse[i].parsefn; i++) {
708 if (strncmp(hijackparse[i].name, p,
709 (size_t)(p2-p)) == 0) {
710 if (nop2 && hijackparse[i].needvalues)
711 errx(1, "invalid hijack specifier: %s",
712 hijackcopy);
713 hijackparse[i].parsefn(nop2 ? NULL : p2+1);
714 break;
715 }
716 }
717
718 if (hijackparse[i].parsefn == NULL)
719 errx(1, "invalid hijack specifier name in %s", p);
720 }
721
722 }
723
724 static void __attribute__((constructor))
725 rcinit(void)
726 {
727 char buf[1024];
728 unsigned i, j;
729
730 host_fork = dlsym(RTLD_NEXT, "fork");
731 host_daemon = dlsym(RTLD_NEXT, "daemon");
732 host_mmap = dlsym(RTLD_NEXT, "mmap");
733
734 /*
735 * In theory cannot print anything during lookups because
736 * we might not have the call vector set up. so, the errx()
737 * is a bit of a strech, but it might work.
738 */
739
740 for (i = 0; i < DUALCALL__NUM; i++) {
741 /* build runtime O(1) access */
742 for (j = 0; j < __arraycount(syscnames); j++) {
743 if (syscnames[j].scm_callnum == i)
744 break;
745 }
746
747 if (j == __arraycount(syscnames))
748 errx(1, "rumphijack error: syscall pos %d missing", i);
749
750 syscalls[i].bs_host = dlsym(RTLD_NEXT,
751 syscnames[j].scm_hostname);
752 if (syscalls[i].bs_host == NULL)
753 errx(1, "hostcall %s not found!",
754 syscnames[j].scm_hostname);
755
756 syscalls[i].bs_rump = dlsym(RTLD_NEXT,
757 syscnames[j].scm_rumpname);
758 if (syscalls[i].bs_rump == NULL)
759 errx(1, "rumpcall %s not found!",
760 syscnames[j].scm_rumpname);
761 }
762
763 if (rumpclient_init() == -1)
764 err(1, "rumpclient init");
765
766 /* check which syscalls we're supposed to hijack */
767 if (getenv_r("RUMPHIJACK", buf, sizeof(buf)) == -1) {
768 strcpy(buf, RUMPHIJACK_DEFAULT);
769 }
770 parsehijack(buf);
771
772 /* set client persistence level */
773 if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) {
774 if (strcmp(buf, "die") == 0)
775 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE);
776 else if (strcmp(buf, "inftime") == 0)
777 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME);
778 else if (strcmp(buf, "once") == 0)
779 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE);
780 else {
781 time_t timeout;
782 char *ep;
783
784 timeout = (time_t)strtoll(buf, &ep, 10);
785 if (timeout <= 0 || ep != buf + strlen(buf))
786 errx(1, "RUMPHIJACK_RETRYCONNECT must be "
787 "keyword or integer, got: %s", buf);
788
789 rumpclient_setconnretry(timeout);
790 }
791 }
792
793 if (getenv_r("RUMPHIJACK__DUP2INFO", buf, sizeof(buf)) == 0) {
794 if (sscanf(buf, "%u,%u,%u",
795 &dup2vec[0], &dup2vec[1], &dup2vec[2]) != 3) {
796 warnx("invalid dup2mask: %s", buf);
797 memset(dup2vec, 0, sizeof(dup2vec));
798 }
799 unsetenv("RUMPHIJACK__DUP2INFO");
800 }
801 if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) {
802 pwdinrump = true;
803 unsetenv("RUMPHIJACK__PWDINRUMP");
804 }
805 }
806
807 static int
808 fd_rump2host(int fd)
809 {
810
811 if (fd == -1)
812 return fd;
813 return fd + hijack_fdoff;
814 }
815
816 static int
817 fd_rump2host_withdup(int fd)
818 {
819 int hfd;
820
821 _DIAGASSERT(fd != -1);
822 hfd = unmapdup2(fd);
823 if (hfd != -1) {
824 _DIAGASSERT(hfd <= DUP2HIGH);
825 return hfd;
826 }
827 return fd_rump2host(fd);
828 }
829
830 static int
831 fd_host2rump(int fd)
832 {
833
834 if (!isdup2d(fd))
835 return fd - hijack_fdoff;
836 else
837 return mapdup2(fd);
838 }
839
840 static bool
841 fd_isrump(int fd)
842 {
843
844 return isdup2d(fd) || fd >= hijack_fdoff;
845 }
846
847 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= hijack_fdoff)
848
849 static enum pathtype
850 path_isrump(const char *path)
851 {
852 size_t plen;
853 int i;
854
855 if (rumpprefix == NULL && nblanket == 0)
856 return PATH_HOST;
857
858 if (*path == '/') {
859 plen = strlen(path);
860 if (rumpprefix && plen >= rumpprefixlen) {
861 if (strncmp(path, rumpprefix, rumpprefixlen) == 0
862 && (plen == rumpprefixlen
863 || *(path + rumpprefixlen) == '/')) {
864 return PATH_RUMP;
865 }
866 }
867 for (i = 0; i < nblanket; i++) {
868 if (strncmp(path, blanket[i].pfx, blanket[i].len) == 0)
869 return PATH_RUMPBLANKET;
870 }
871
872 return PATH_HOST;
873 } else {
874 return pwdinrump ? PATH_RUMP : PATH_HOST;
875 }
876 }
877
878 static const char *rootpath = "/";
879 static const char *
880 path_host2rump(const char *path)
881 {
882 const char *rv;
883
884 if (*path == '/') {
885 rv = path + rumpprefixlen;
886 if (*rv == '\0')
887 rv = rootpath;
888 } else {
889 rv = path;
890 }
891
892 return rv;
893 }
894
895 static int
896 dodup(int oldd, int minfd)
897 {
898 int (*op_fcntl)(int, int, ...);
899 int newd;
900 int isrump;
901
902 DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd));
903 if (fd_isrump(oldd)) {
904 op_fcntl = GETSYSCALL(rump, FCNTL);
905 oldd = fd_host2rump(oldd);
906 if (minfd >= hijack_fdoff)
907 minfd -= hijack_fdoff;
908 isrump = 1;
909 } else {
910 op_fcntl = GETSYSCALL(host, FCNTL);
911 isrump = 0;
912 }
913
914 newd = op_fcntl(oldd, F_DUPFD, minfd);
915
916 if (isrump)
917 newd = fd_rump2host(newd);
918 DPRINTF(("dup <- %d\n", newd));
919
920 return newd;
921 }
922
923 /*
924 * Check that host fd value does not exceed fdoffset and if necessary
925 * dup the file descriptor so that it doesn't collide with the dup2mask.
926 */
927 static int
928 fd_host2host(int fd)
929 {
930 int (*op_fcntl)(int, int, ...) = GETSYSCALL(host, FCNTL);
931 int (*op_close)(int) = GETSYSCALL(host, CLOSE);
932 int ofd, i;
933
934 if (fd >= hijack_fdoff) {
935 op_close(fd);
936 errno = ENFILE;
937 return -1;
938 }
939
940 for (i = 1; isdup2d(fd); i++) {
941 ofd = fd;
942 fd = op_fcntl(ofd, F_DUPFD, i);
943 op_close(ofd);
944 }
945
946 return fd;
947 }
948
949 int
950 open(const char *path, int flags, ...)
951 {
952 int (*op_open)(const char *, int, ...);
953 bool isrump;
954 va_list ap;
955 enum pathtype pt;
956 int fd;
957
958 DPRINTF(("open -> %s (%s)\n", path, whichpath(path)));
959
960 if ((pt = path_isrump(path)) != PATH_HOST) {
961 if (pt == PATH_RUMP)
962 path = path_host2rump(path);
963 op_open = GETSYSCALL(rump, OPEN);
964 isrump = true;
965 } else {
966 op_open = GETSYSCALL(host, OPEN);
967 isrump = false;
968 }
969
970 va_start(ap, flags);
971 fd = op_open(path, flags, va_arg(ap, mode_t));
972 va_end(ap);
973
974 if (isrump)
975 fd = fd_rump2host(fd);
976 else
977 fd = fd_host2host(fd);
978
979 DPRINTF(("open <- %d (%s)\n", fd, whichfd(fd)));
980 return fd;
981 }
982
983 int
984 chdir(const char *path)
985 {
986 int (*op_chdir)(const char *);
987 enum pathtype pt;
988 int rv;
989
990 if ((pt = path_isrump(path)) != PATH_HOST) {
991 op_chdir = GETSYSCALL(rump, CHDIR);
992 if (pt == PATH_RUMP)
993 path = path_host2rump(path);
994 } else {
995 op_chdir = GETSYSCALL(host, CHDIR);
996 }
997
998 rv = op_chdir(path);
999 if (rv == 0)
1000 pwdinrump = pt != PATH_HOST;
1001
1002 return rv;
1003 }
1004
1005 int
1006 fchdir(int fd)
1007 {
1008 int (*op_fchdir)(int);
1009 bool isrump;
1010 int rv;
1011
1012 if (fd_isrump(fd)) {
1013 op_fchdir = GETSYSCALL(rump, FCHDIR);
1014 isrump = true;
1015 fd = fd_host2rump(fd);
1016 } else {
1017 op_fchdir = GETSYSCALL(host, FCHDIR);
1018 isrump = false;
1019 }
1020
1021 rv = op_fchdir(fd);
1022 if (rv == 0) {
1023 pwdinrump = isrump;
1024 }
1025
1026 return rv;
1027 }
1028
1029 int
1030 __getcwd(char *bufp, size_t len)
1031 {
1032 int (*op___getcwd)(char *, size_t);
1033 size_t prefixgap;
1034 bool iamslash;
1035 int rv;
1036
1037 if (pwdinrump && rumpprefix) {
1038 if (rumpprefix[rumpprefixlen-1] == '/')
1039 iamslash = true;
1040 else
1041 iamslash = false;
1042
1043 if (iamslash)
1044 prefixgap = rumpprefixlen - 1; /* ``//+path'' */
1045 else
1046 prefixgap = rumpprefixlen; /* ``/pfx+/path'' */
1047 if (len <= prefixgap) {
1048 errno = ERANGE;
1049 return -1;
1050 }
1051
1052 op___getcwd = GETSYSCALL(rump, __GETCWD);
1053 rv = op___getcwd(bufp + prefixgap, len - prefixgap);
1054 if (rv == -1)
1055 return rv;
1056
1057 /* augment the "/" part only for a non-root path */
1058 memcpy(bufp, rumpprefix, rumpprefixlen);
1059
1060 /* append / only to non-root cwd */
1061 if (rv != 2)
1062 bufp[prefixgap] = '/';
1063
1064 /* don't append extra slash in the purely-slash case */
1065 if (rv == 2 && !iamslash)
1066 bufp[rumpprefixlen] = '\0';
1067 } else if (pwdinrump) {
1068 /* assume blanket. we can't provide a prefix here */
1069 op___getcwd = GETSYSCALL(rump, __GETCWD);
1070 rv = op___getcwd(bufp, len);
1071 } else {
1072 op___getcwd = GETSYSCALL(host, __GETCWD);
1073 rv = op___getcwd(bufp, len);
1074 }
1075
1076 return rv;
1077 }
1078
1079 int
1080 rename(const char *from, const char *to)
1081 {
1082 int (*op_rename)(const char *, const char *);
1083 enum pathtype ptf, ptt;
1084
1085 if ((ptf = path_isrump(from)) != PATH_HOST) {
1086 if ((ptt = path_isrump(to)) == PATH_HOST) {
1087 errno = EXDEV;
1088 return -1;
1089 }
1090
1091 if (ptf == PATH_RUMP)
1092 from = path_host2rump(from);
1093 if (ptt == PATH_RUMP)
1094 to = path_host2rump(to);
1095 op_rename = GETSYSCALL(rump, RENAME);
1096 } else {
1097 if (path_isrump(to) != PATH_HOST) {
1098 errno = EXDEV;
1099 return -1;
1100 }
1101
1102 op_rename = GETSYSCALL(host, RENAME);
1103 }
1104
1105 return op_rename(from, to);
1106 }
1107
1108 int __socket30(int, int, int);
1109 int
1110 __socket30(int domain, int type, int protocol)
1111 {
1112 int (*op_socket)(int, int, int);
1113 int fd;
1114 bool isrump;
1115
1116 isrump = domain < PF_MAX && rumpsockets[domain];
1117
1118 if (isrump)
1119 op_socket = GETSYSCALL(rump, SOCKET);
1120 else
1121 op_socket = GETSYSCALL(host, SOCKET);
1122 fd = op_socket(domain, type, protocol);
1123
1124 if (isrump)
1125 fd = fd_rump2host(fd);
1126 else
1127 fd = fd_host2host(fd);
1128 DPRINTF(("socket <- %d\n", fd));
1129
1130 return fd;
1131 }
1132
1133 int
1134 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
1135 {
1136 int (*op_accept)(int, struct sockaddr *, socklen_t *);
1137 int fd;
1138 bool isrump;
1139
1140 isrump = fd_isrump(s);
1141
1142 DPRINTF(("accept -> %d", s));
1143 if (isrump) {
1144 op_accept = GETSYSCALL(rump, ACCEPT);
1145 s = fd_host2rump(s);
1146 } else {
1147 op_accept = GETSYSCALL(host, ACCEPT);
1148 }
1149 fd = op_accept(s, addr, addrlen);
1150 if (fd != -1 && isrump)
1151 fd = fd_rump2host(fd);
1152 else
1153 fd = fd_host2host(fd);
1154
1155 DPRINTF((" <- %d\n", fd));
1156
1157 return fd;
1158 }
1159
1160 /*
1161 * ioctl and fcntl are varargs calls and need special treatment
1162 */
1163 int
1164 ioctl(int fd, unsigned long cmd, ...)
1165 {
1166 int (*op_ioctl)(int, unsigned long cmd, ...);
1167 va_list ap;
1168 int rv;
1169
1170 DPRINTF(("ioctl -> %d\n", fd));
1171 if (fd_isrump(fd)) {
1172 fd = fd_host2rump(fd);
1173 op_ioctl = GETSYSCALL(rump, IOCTL);
1174 } else {
1175 op_ioctl = GETSYSCALL(host, IOCTL);
1176 }
1177
1178 va_start(ap, cmd);
1179 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
1180 va_end(ap);
1181 return rv;
1182 }
1183
1184 int
1185 fcntl(int fd, int cmd, ...)
1186 {
1187 int (*op_fcntl)(int, int, ...);
1188 va_list ap;
1189 int rv, minfd, i, maxdup2;
1190
1191 DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd));
1192
1193 switch (cmd) {
1194 case F_DUPFD:
1195 va_start(ap, cmd);
1196 minfd = va_arg(ap, int);
1197 va_end(ap);
1198 return dodup(fd, minfd);
1199
1200 case F_CLOSEM:
1201 /*
1202 * So, if fd < HIJACKOFF, we want to do a host closem.
1203 */
1204
1205 if (fd < hijack_fdoff) {
1206 int closemfd = fd;
1207
1208 if (rumpclient__closenotify(&closemfd,
1209 RUMPCLIENT_CLOSE_FCLOSEM) == -1)
1210 return -1;
1211 op_fcntl = GETSYSCALL(host, FCNTL);
1212 rv = op_fcntl(closemfd, cmd);
1213 if (rv)
1214 return rv;
1215 }
1216
1217 /*
1218 * Additionally, we want to do a rump closem, but only
1219 * for the file descriptors not dup2'd.
1220 */
1221
1222 for (i = 0, maxdup2 = 0; i <= DUP2HIGH; i++) {
1223 if (dup2vec[i] & DUP2BIT) {
1224 int val;
1225
1226 val = dup2vec[i] & DUP2FDMASK;
1227 maxdup2 = MAX(val, maxdup2);
1228 }
1229 }
1230
1231 if (fd >= hijack_fdoff)
1232 fd -= hijack_fdoff;
1233 else
1234 fd = 0;
1235 fd = MAX(maxdup2+1, fd);
1236
1237 /* hmm, maybe we should close rump fd's not within dup2mask? */
1238 return rump_sys_fcntl(fd, F_CLOSEM);
1239
1240 case F_MAXFD:
1241 /*
1242 * For maxfd, if there's a rump kernel fd, return
1243 * it hostified. Otherwise, return host's MAXFD
1244 * return value.
1245 */
1246 if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) {
1247 /*
1248 * This might go a little wrong in case
1249 * of dup2 to [012], but I'm not sure if
1250 * there's a justification for tracking
1251 * that info. Consider e.g.
1252 * dup2(rumpfd, 2) followed by rump_sys_open()
1253 * returning 1. We should return 1+HIJACKOFF,
1254 * not 2+HIJACKOFF. However, if [01] is not
1255 * open, the correct return value is 2.
1256 */
1257 return fd_rump2host(fd);
1258 } else {
1259 op_fcntl = GETSYSCALL(host, FCNTL);
1260 return op_fcntl(fd, F_MAXFD);
1261 }
1262 /*NOTREACHED*/
1263
1264 default:
1265 if (fd_isrump(fd)) {
1266 fd = fd_host2rump(fd);
1267 op_fcntl = GETSYSCALL(rump, FCNTL);
1268 } else {
1269 op_fcntl = GETSYSCALL(host, FCNTL);
1270 }
1271
1272 va_start(ap, cmd);
1273 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
1274 va_end(ap);
1275 return rv;
1276 }
1277 /*NOTREACHED*/
1278 }
1279
1280 int
1281 close(int fd)
1282 {
1283 int (*op_close)(int);
1284 int rv;
1285
1286 DPRINTF(("close -> %d\n", fd));
1287 if (fd_isrump(fd)) {
1288 bool undup2 = false;
1289 int ofd;
1290
1291 if (isdup2d(ofd = fd)) {
1292 undup2 = true;
1293 }
1294
1295 fd = fd_host2rump(fd);
1296 if (!undup2 && killdup2alias(fd)) {
1297 return 0;
1298 }
1299
1300 op_close = GETSYSCALL(rump, CLOSE);
1301 rv = op_close(fd);
1302 if (rv == 0 && undup2) {
1303 clrdup2(ofd);
1304 }
1305 } else {
1306 if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1)
1307 return -1;
1308 op_close = GETSYSCALL(host, CLOSE);
1309 rv = op_close(fd);
1310 }
1311
1312 return rv;
1313 }
1314
1315 /*
1316 * write cannot issue a standard debug printf due to recursion
1317 */
1318 ssize_t
1319 write(int fd, const void *buf, size_t blen)
1320 {
1321 ssize_t (*op_write)(int, const void *, size_t);
1322
1323 if (fd_isrump(fd)) {
1324 fd = fd_host2rump(fd);
1325 op_write = GETSYSCALL(rump, WRITE);
1326 } else {
1327 op_write = GETSYSCALL(host, WRITE);
1328 }
1329
1330 return op_write(fd, buf, blen);
1331 }
1332
1333 /*
1334 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
1335 * many programs do that. dup2 of a rump kernel fd to another value
1336 * not >= fdoff is an error.
1337 *
1338 * Note: cannot rump2host newd, because it is often hardcoded.
1339 */
1340 int
1341 dup2(int oldd, int newd)
1342 {
1343 int (*host_dup2)(int, int);
1344 int rv;
1345
1346 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
1347
1348 if (fd_isrump(oldd)) {
1349 int (*op_close)(int) = GETSYSCALL(host, CLOSE);
1350
1351 /* only allow fd 0-2 for cross-kernel dup */
1352 if (!(newd >= 0 && newd <= 2 && !fd_isrump(newd))) {
1353 errno = EBADF;
1354 return -1;
1355 }
1356
1357 /* regular dup2? */
1358 if (fd_isrump(newd)) {
1359 newd = fd_host2rump(newd);
1360 rv = rump_sys_dup2(oldd, newd);
1361 return fd_rump2host(rv);
1362 }
1363
1364 /*
1365 * dup2 rump => host? just establish an
1366 * entry in the mapping table.
1367 */
1368 op_close(newd);
1369 setdup2(newd, fd_host2rump(oldd));
1370 rv = 0;
1371 } else {
1372 host_dup2 = syscalls[DUALCALL_DUP2].bs_host;
1373 if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1)
1374 return -1;
1375 rv = host_dup2(oldd, newd);
1376 }
1377
1378 return rv;
1379 }
1380
1381 int
1382 dup(int oldd)
1383 {
1384
1385 return dodup(oldd, 0);
1386 }
1387
1388 pid_t
1389 fork()
1390 {
1391 pid_t rv;
1392
1393 DPRINTF(("fork\n"));
1394
1395 rv = rumpclient__dofork(host_fork);
1396
1397 DPRINTF(("fork returns %d\n", rv));
1398 return rv;
1399 }
1400 /* we do not have the luxury of not requiring a stackframe */
1401 __strong_alias(__vfork14,fork);
1402
1403 int
1404 daemon(int nochdir, int noclose)
1405 {
1406 struct rumpclient_fork *rf;
1407
1408 if ((rf = rumpclient_prefork()) == NULL)
1409 return -1;
1410
1411 if (host_daemon(nochdir, noclose) == -1)
1412 return -1;
1413
1414 if (rumpclient_fork_init(rf) == -1)
1415 return -1;
1416
1417 return 0;
1418 }
1419
1420 int
1421 execve(const char *path, char *const argv[], char *const envp[])
1422 {
1423 char buf[128];
1424 char *dup2str;
1425 const char *pwdinrumpstr;
1426 char **newenv;
1427 size_t nelem;
1428 int rv, sverrno;
1429 int bonus = 2, i = 0;
1430
1431 snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2INFO=%u,%u,%u",
1432 dup2vec[0], dup2vec[1], dup2vec[2]);
1433 dup2str = strdup(buf);
1434 if (dup2str == NULL) {
1435 errno = ENOMEM;
1436 return -1;
1437 }
1438
1439 if (pwdinrump) {
1440 pwdinrumpstr = "RUMPHIJACK__PWDINRUMP=true";
1441 bonus++;
1442 } else {
1443 pwdinrumpstr = NULL;
1444 }
1445
1446 for (nelem = 0; envp && envp[nelem]; nelem++)
1447 continue;
1448 newenv = malloc(sizeof(*newenv) * (nelem+bonus));
1449 if (newenv == NULL) {
1450 free(dup2str);
1451 errno = ENOMEM;
1452 return -1;
1453 }
1454 memcpy(newenv, envp, nelem*sizeof(*newenv));
1455 newenv[nelem+i] = dup2str;
1456 i++;
1457
1458 if (pwdinrumpstr) {
1459 newenv[nelem+i] = __UNCONST(pwdinrumpstr);
1460 i++;
1461 }
1462 newenv[nelem+i] = NULL;
1463 _DIAGASSERT(i < bonus);
1464
1465 rv = rumpclient_exec(path, argv, newenv);
1466
1467 _DIAGASSERT(rv != 0);
1468 sverrno = errno;
1469 free(newenv);
1470 free(dup2str);
1471 errno = sverrno;
1472 return rv;
1473 }
1474
1475 /*
1476 * select is done by calling poll.
1477 */
1478 int
1479 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
1480 struct timeval *timeout)
1481 {
1482 struct pollfd *pfds;
1483 struct timespec ts, *tsp = NULL;
1484 nfds_t realnfds;
1485 int i, j;
1486 int rv, incr;
1487
1488 DPRINTF(("select\n"));
1489
1490 /*
1491 * Well, first we must scan the fds to figure out how many
1492 * fds there really are. This is because up to and including
1493 * nb5 poll() silently refuses nfds > process_maxopen_fds.
1494 * Seems to be fixed in current, thank the maker.
1495 * god damn cluster...bomb.
1496 */
1497
1498 for (i = 0, realnfds = 0; i < nfds; i++) {
1499 if (readfds && FD_ISSET(i, readfds)) {
1500 realnfds++;
1501 continue;
1502 }
1503 if (writefds && FD_ISSET(i, writefds)) {
1504 realnfds++;
1505 continue;
1506 }
1507 if (exceptfds && FD_ISSET(i, exceptfds)) {
1508 realnfds++;
1509 continue;
1510 }
1511 }
1512
1513 if (realnfds) {
1514 pfds = calloc(realnfds, sizeof(*pfds));
1515 if (!pfds)
1516 return -1;
1517 } else {
1518 pfds = NULL;
1519 }
1520
1521 for (i = 0, j = 0; i < nfds; i++) {
1522 incr = 0;
1523 if (readfds && FD_ISSET(i, readfds)) {
1524 pfds[j].fd = i;
1525 pfds[j].events |= POLLIN;
1526 incr=1;
1527 }
1528 if (writefds && FD_ISSET(i, writefds)) {
1529 pfds[j].fd = i;
1530 pfds[j].events |= POLLOUT;
1531 incr=1;
1532 }
1533 if (exceptfds && FD_ISSET(i, exceptfds)) {
1534 pfds[j].fd = i;
1535 pfds[j].events |= POLLHUP|POLLERR;
1536 incr=1;
1537 }
1538 if (incr)
1539 j++;
1540 }
1541 assert(j == (int)realnfds);
1542
1543 if (timeout) {
1544 TIMEVAL_TO_TIMESPEC(timeout, &ts);
1545 tsp = &ts;
1546 }
1547 rv = REALPOLLTS(pfds, realnfds, tsp, NULL);
1548 /*
1549 * "If select() returns with an error the descriptor sets
1550 * will be unmodified"
1551 */
1552 if (rv < 0)
1553 goto out;
1554
1555 /*
1556 * zero out results (can't use FD_ZERO for the
1557 * obvious select-me-not reason). whee.
1558 *
1559 * We do this here since some software ignores the return
1560 * value of select, and hence if the timeout expires, it may
1561 * assume all input descriptors have activity.
1562 */
1563 for (i = 0; i < nfds; i++) {
1564 if (readfds)
1565 FD_CLR(i, readfds);
1566 if (writefds)
1567 FD_CLR(i, writefds);
1568 if (exceptfds)
1569 FD_CLR(i, exceptfds);
1570 }
1571 if (rv == 0)
1572 goto out;
1573
1574 /*
1575 * We have >0 fds with activity. Harvest the results.
1576 */
1577 for (i = 0; i < (int)realnfds; i++) {
1578 if (readfds) {
1579 if (pfds[i].revents & POLLIN) {
1580 FD_SET(pfds[i].fd, readfds);
1581 }
1582 }
1583 if (writefds) {
1584 if (pfds[i].revents & POLLOUT) {
1585 FD_SET(pfds[i].fd, writefds);
1586 }
1587 }
1588 if (exceptfds) {
1589 if (pfds[i].revents & (POLLHUP|POLLERR)) {
1590 FD_SET(pfds[i].fd, exceptfds);
1591 }
1592 }
1593 }
1594
1595 out:
1596 free(pfds);
1597 return rv;
1598 }
1599
1600 static void
1601 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
1602 {
1603 nfds_t i;
1604
1605 for (i = 0; i < nfds; i++) {
1606 if (fds[i].fd == -1)
1607 continue;
1608
1609 if (fd_isrump(fds[i].fd))
1610 (*rumpcall)++;
1611 else
1612 (*hostcall)++;
1613 }
1614 }
1615
1616 static void
1617 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
1618 {
1619 nfds_t i;
1620
1621 for (i = 0; i < nfds; i++) {
1622 fds[i].fd = fdadj(fds[i].fd);
1623 }
1624 }
1625
1626 /*
1627 * poll is easy as long as the call comes in the fds only in one
1628 * kernel. otherwise its quite tricky...
1629 */
1630 struct pollarg {
1631 struct pollfd *pfds;
1632 nfds_t nfds;
1633 const struct timespec *ts;
1634 const sigset_t *sigmask;
1635 int pipefd;
1636 int errnum;
1637 };
1638
1639 static void *
1640 hostpoll(void *arg)
1641 {
1642 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
1643 const sigset_t *);
1644 struct pollarg *parg = arg;
1645 intptr_t rv;
1646
1647 op_pollts = GETSYSCALL(host, POLLTS);
1648 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
1649 if (rv == -1)
1650 parg->errnum = errno;
1651 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
1652
1653 return (void *)(intptr_t)rv;
1654 }
1655
1656 int
1657 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
1658 const sigset_t *sigmask)
1659 {
1660 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
1661 const sigset_t *);
1662 int (*host_close)(int);
1663 int hostcall = 0, rumpcall = 0;
1664 pthread_t pt;
1665 nfds_t i;
1666 int rv;
1667
1668 DPRINTF(("poll\n"));
1669 checkpoll(fds, nfds, &hostcall, &rumpcall);
1670
1671 if (hostcall && rumpcall) {
1672 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
1673 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
1674 struct pollarg parg;
1675 uintptr_t lrv;
1676 int sverrno = 0, trv;
1677
1678 /*
1679 * ok, this is where it gets tricky. We must support
1680 * this since it's a very common operation in certain
1681 * types of software (telnet, netcat, etc). We allocate
1682 * two vectors and run two poll commands in separate
1683 * threads. Whichever returns first "wins" and the
1684 * other kernel's fds won't show activity.
1685 */
1686 rv = -1;
1687
1688 /* allocate full vector for O(n) joining after call */
1689 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
1690 if (!pfd_host)
1691 goto out;
1692 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
1693 if (!pfd_rump) {
1694 goto out;
1695 }
1696
1697 /*
1698 * then, open two pipes, one for notifications
1699 * to each kernel.
1700 *
1701 * At least the rump pipe should probably be
1702 * cached, along with the helper threads. This
1703 * should give a microbenchmark improvement (haven't
1704 * experienced a macro-level problem yet, though).
1705 */
1706 if ((rv = rump_sys_pipe(rpipe)) == -1) {
1707 sverrno = errno;
1708 }
1709 if (rv == 0 && (rv = pipe(hpipe)) == -1) {
1710 sverrno = errno;
1711 }
1712
1713 /* split vectors (or signal errors) */
1714 for (i = 0; i < nfds; i++) {
1715 int fd;
1716
1717 fds[i].revents = 0;
1718 if (fds[i].fd == -1) {
1719 pfd_host[i].fd = -1;
1720 pfd_rump[i].fd = -1;
1721 } else if (fd_isrump(fds[i].fd)) {
1722 pfd_host[i].fd = -1;
1723 fd = fd_host2rump(fds[i].fd);
1724 if (fd == rpipe[0] || fd == rpipe[1]) {
1725 fds[i].revents = POLLNVAL;
1726 if (rv != -1)
1727 rv++;
1728 }
1729 pfd_rump[i].fd = fd;
1730 pfd_rump[i].events = fds[i].events;
1731 } else {
1732 pfd_rump[i].fd = -1;
1733 fd = fds[i].fd;
1734 if (fd == hpipe[0] || fd == hpipe[1]) {
1735 fds[i].revents = POLLNVAL;
1736 if (rv != -1)
1737 rv++;
1738 }
1739 pfd_host[i].fd = fd;
1740 pfd_host[i].events = fds[i].events;
1741 }
1742 pfd_rump[i].revents = pfd_host[i].revents = 0;
1743 }
1744 if (rv) {
1745 goto out;
1746 }
1747
1748 pfd_host[nfds].fd = hpipe[0];
1749 pfd_host[nfds].events = POLLIN;
1750 pfd_rump[nfds].fd = rpipe[0];
1751 pfd_rump[nfds].events = POLLIN;
1752
1753 /*
1754 * then, create a thread to do host part and meanwhile
1755 * do rump kernel part right here
1756 */
1757
1758 parg.pfds = pfd_host;
1759 parg.nfds = nfds+1;
1760 parg.ts = ts;
1761 parg.sigmask = sigmask;
1762 parg.pipefd = rpipe[1];
1763 pthread_create(&pt, NULL, hostpoll, &parg);
1764
1765 op_pollts = GETSYSCALL(rump, POLLTS);
1766 lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
1767 sverrno = errno;
1768 write(hpipe[1], &rv, sizeof(rv));
1769 pthread_join(pt, (void *)&trv);
1770
1771 /* check who "won" and merge results */
1772 if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
1773 rv = trv;
1774
1775 for (i = 0; i < nfds; i++) {
1776 if (pfd_rump[i].fd != -1)
1777 fds[i].revents = pfd_rump[i].revents;
1778 }
1779 sverrno = parg.errnum;
1780 } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
1781 rv = trv;
1782
1783 for (i = 0; i < nfds; i++) {
1784 if (pfd_host[i].fd != -1)
1785 fds[i].revents = pfd_host[i].revents;
1786 }
1787 } else {
1788 rv = 0;
1789 }
1790
1791 out:
1792 host_close = GETSYSCALL(host, CLOSE);
1793 if (rpipe[0] != -1)
1794 rump_sys_close(rpipe[0]);
1795 if (rpipe[1] != -1)
1796 rump_sys_close(rpipe[1]);
1797 if (hpipe[0] != -1)
1798 host_close(hpipe[0]);
1799 if (hpipe[1] != -1)
1800 host_close(hpipe[1]);
1801 free(pfd_host);
1802 free(pfd_rump);
1803 errno = sverrno;
1804 } else {
1805 if (hostcall) {
1806 op_pollts = GETSYSCALL(host, POLLTS);
1807 } else {
1808 op_pollts = GETSYSCALL(rump, POLLTS);
1809 adjustpoll(fds, nfds, fd_host2rump);
1810 }
1811
1812 rv = op_pollts(fds, nfds, ts, sigmask);
1813 if (rumpcall)
1814 adjustpoll(fds, nfds, fd_rump2host_withdup);
1815 }
1816
1817 return rv;
1818 }
1819
1820 int
1821 poll(struct pollfd *fds, nfds_t nfds, int timeout)
1822 {
1823 struct timespec ts;
1824 struct timespec *tsp = NULL;
1825
1826 if (timeout != INFTIM) {
1827 ts.tv_sec = timeout / 1000;
1828 ts.tv_nsec = (timeout % 1000) * 1000*1000;
1829
1830 tsp = &ts;
1831 }
1832
1833 return REALPOLLTS(fds, nfds, tsp, NULL);
1834 }
1835
1836 int
1837 REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges,
1838 struct kevent *eventlist, size_t nevents,
1839 const struct timespec *timeout)
1840 {
1841 int (*op_kevent)(int, const struct kevent *, size_t,
1842 struct kevent *, size_t, const struct timespec *);
1843 const struct kevent *ev;
1844 size_t i;
1845
1846 /*
1847 * Check that we don't attempt to kevent rump kernel fd's.
1848 * That needs similar treatment to select/poll, but is slightly
1849 * trickier since we need to manage to different kq descriptors.
1850 * (TODO, in case you're wondering).
1851 */
1852 for (i = 0; i < nchanges; i++) {
1853 ev = &changelist[i];
1854 if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE ||
1855 ev->filter == EVFILT_VNODE) {
1856 if (fd_isrump((int)ev->ident)) {
1857 errno = ENOTSUP;
1858 return -1;
1859 }
1860 }
1861 }
1862
1863 op_kevent = GETSYSCALL(host, KEVENT);
1864 return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout);
1865 }
1866
1867 /*
1868 * mmapping from a rump kernel is not supported, so disallow it.
1869 */
1870 void *
1871 mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset)
1872 {
1873
1874 if (flags & MAP_FILE && fd_isrump(fd)) {
1875 errno = ENOSYS;
1876 return MAP_FAILED;
1877 }
1878 return host_mmap(addr, len, prot, flags, fd, offset);
1879 }
1880
1881 /*
1882 * these go to one or the other on a per-process configuration
1883 */
1884 int __sysctl(const int *, unsigned int, void *, size_t *, const void *, size_t);
1885 int
1886 __sysctl(const int *name, unsigned int namelen, void *old, size_t *oldlenp,
1887 const void *new, size_t newlen)
1888 {
1889 int (*op___sysctl)(const int *, unsigned int, void *, size_t *,
1890 const void *, size_t);
1891
1892 if (rumpsysctl) {
1893 op___sysctl = GETSYSCALL(rump, __SYSCTL);
1894 } else {
1895 op___sysctl = GETSYSCALL(host, __SYSCTL);
1896 /* we haven't inited yet */
1897 if (__predict_false(op___sysctl == NULL)) {
1898 op___sysctl = rumphijack_dlsym(RTLD_NEXT, "__sysctl");
1899 }
1900 }
1901
1902 return op___sysctl(name, namelen, old, oldlenp, new, newlen);
1903 }
1904
1905 /*
1906 * Rest are std type calls.
1907 */
1908
1909 FDCALL(int, bind, DUALCALL_BIND, \
1910 (int fd, const struct sockaddr *name, socklen_t namelen), \
1911 (int, const struct sockaddr *, socklen_t), \
1912 (fd, name, namelen))
1913
1914 FDCALL(int, connect, DUALCALL_CONNECT, \
1915 (int fd, const struct sockaddr *name, socklen_t namelen), \
1916 (int, const struct sockaddr *, socklen_t), \
1917 (fd, name, namelen))
1918
1919 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \
1920 (int fd, struct sockaddr *name, socklen_t *namelen), \
1921 (int, struct sockaddr *, socklen_t *), \
1922 (fd, name, namelen))
1923
1924 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \
1925 (int fd, struct sockaddr *name, socklen_t *namelen), \
1926 (int, struct sockaddr *, socklen_t *), \
1927 (fd, name, namelen))
1928
1929 FDCALL(int, listen, DUALCALL_LISTEN, \
1930 (int fd, int backlog), \
1931 (int, int), \
1932 (fd, backlog))
1933
1934 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \
1935 (int fd, void *buf, size_t len, int flags, \
1936 struct sockaddr *from, socklen_t *fromlen), \
1937 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \
1938 (fd, buf, len, flags, from, fromlen))
1939
1940 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \
1941 (int fd, const void *buf, size_t len, int flags, \
1942 const struct sockaddr *to, socklen_t tolen), \
1943 (int, const void *, size_t, int, \
1944 const struct sockaddr *, socklen_t), \
1945 (fd, buf, len, flags, to, tolen))
1946
1947 FDCALL(ssize_t, recvmsg, DUALCALL_RECVMSG, \
1948 (int fd, struct msghdr *msg, int flags), \
1949 (int, struct msghdr *, int), \
1950 (fd, msg, flags))
1951
1952 FDCALL(ssize_t, sendmsg, DUALCALL_SENDMSG, \
1953 (int fd, const struct msghdr *msg, int flags), \
1954 (int, const struct msghdr *, int), \
1955 (fd, msg, flags))
1956
1957 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \
1958 (int fd, int level, int optn, void *optval, socklen_t *optlen), \
1959 (int, int, int, void *, socklen_t *), \
1960 (fd, level, optn, optval, optlen))
1961
1962 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \
1963 (int fd, int level, int optn, \
1964 const void *optval, socklen_t optlen), \
1965 (int, int, int, const void *, socklen_t), \
1966 (fd, level, optn, optval, optlen))
1967
1968 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \
1969 (int fd, int how), \
1970 (int, int), \
1971 (fd, how))
1972
1973 #if _FORTIFY_SOURCE > 0
1974 #define STUB(fun) __ssp_weak_name(fun)
1975 ssize_t _sys_readlink(const char * __restrict, char * __restrict, size_t);
1976 ssize_t
1977 STUB(readlink)(const char * __restrict path, char * __restrict buf,
1978 size_t bufsiz)
1979 {
1980 return _sys_readlink(path, buf, bufsiz);
1981 }
1982
1983 char *_sys_getcwd(char *, size_t);
1984 char *
1985 STUB(getcwd)(char *buf, size_t size)
1986 {
1987 return _sys_getcwd(buf, size);
1988 }
1989 #else
1990 #define STUB(fun) fun
1991 #endif
1992
1993 FDCALL(ssize_t, REALREAD, DUALCALL_READ, \
1994 (int fd, void *buf, size_t buflen), \
1995 (int, void *, size_t), \
1996 (fd, buf, buflen))
1997
1998 FDCALL(ssize_t, readv, DUALCALL_READV, \
1999 (int fd, const struct iovec *iov, int iovcnt), \
2000 (int, const struct iovec *, int), \
2001 (fd, iov, iovcnt))
2002
2003 FDCALL(ssize_t, REALPREAD, DUALCALL_PREAD, \
2004 (int fd, void *buf, size_t nbytes, off_t offset), \
2005 (int, void *, size_t, off_t), \
2006 (fd, buf, nbytes, offset))
2007
2008 FDCALL(ssize_t, preadv, DUALCALL_PREADV, \
2009 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \
2010 (int, const struct iovec *, int, off_t), \
2011 (fd, iov, iovcnt, offset))
2012
2013 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \
2014 (int fd, const struct iovec *iov, int iovcnt), \
2015 (int, const struct iovec *, int), \
2016 (fd, iov, iovcnt))
2017
2018 FDCALL(ssize_t, REALPWRITE, DUALCALL_PWRITE, \
2019 (int fd, const void *buf, size_t nbytes, off_t offset), \
2020 (int, const void *, size_t, off_t), \
2021 (fd, buf, nbytes, offset))
2022
2023 FDCALL(ssize_t, pwritev, DUALCALL_PWRITEV, \
2024 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \
2025 (int, const struct iovec *, int, off_t), \
2026 (fd, iov, iovcnt, offset))
2027
2028 FDCALL(int, REALFSTAT, DUALCALL_FSTAT, \
2029 (int fd, struct stat *sb), \
2030 (int, struct stat *), \
2031 (fd, sb))
2032
2033 FDCALL(int, fstatvfs1, DUALCALL_FSTATVFS1, \
2034 (int fd, struct statvfs *buf, int flags), \
2035 (int, struct statvfs *, int), \
2036 (fd, buf, flags))
2037
2038 FDCALL(off_t, lseek, DUALCALL_LSEEK, \
2039 (int fd, off_t offset, int whence), \
2040 (int, off_t, int), \
2041 (fd, offset, whence))
2042 __strong_alias(_lseek,lseek);
2043
2044 FDCALL(int, REALGETDENTS, DUALCALL_GETDENTS, \
2045 (int fd, char *buf, size_t nbytes), \
2046 (int, char *, size_t), \
2047 (fd, buf, nbytes))
2048
2049 FDCALL(int, fchown, DUALCALL_FCHOWN, \
2050 (int fd, uid_t owner, gid_t group), \
2051 (int, uid_t, gid_t), \
2052 (fd, owner, group))
2053
2054 FDCALL(int, fchmod, DUALCALL_FCHMOD, \
2055 (int fd, mode_t mode), \
2056 (int, mode_t), \
2057 (fd, mode))
2058
2059 FDCALL(int, ftruncate, DUALCALL_FTRUNCATE, \
2060 (int fd, off_t length), \
2061 (int, off_t), \
2062 (fd, length))
2063
2064 FDCALL(int, fsync, DUALCALL_FSYNC, \
2065 (int fd), \
2066 (int), \
2067 (fd))
2068
2069 FDCALL(int, fsync_range, DUALCALL_FSYNC_RANGE, \
2070 (int fd, int how, off_t start, off_t length), \
2071 (int, int, off_t, off_t), \
2072 (fd, how, start, length))
2073
2074 FDCALL(int, futimes, DUALCALL_FUTIMES, \
2075 (int fd, const struct timeval *tv), \
2076 (int, const struct timeval *), \
2077 (fd, tv))
2078
2079 FDCALL(int, fchflags, DUALCALL_FCHFLAGS, \
2080 (int fd, u_long flags), \
2081 (int, u_long), \
2082 (fd, flags))
2083
2084 /*
2085 * path-based selectors
2086 */
2087
2088 PATHCALL(int, REALSTAT, DUALCALL_STAT, \
2089 (const char *path, struct stat *sb), \
2090 (const char *, struct stat *), \
2091 (path, sb))
2092
2093 PATHCALL(int, REALLSTAT, DUALCALL_LSTAT, \
2094 (const char *path, struct stat *sb), \
2095 (const char *, struct stat *), \
2096 (path, sb))
2097
2098 PATHCALL(int, chown, DUALCALL_CHOWN, \
2099 (const char *path, uid_t owner, gid_t group), \
2100 (const char *, uid_t, gid_t), \
2101 (path, owner, group))
2102
2103 PATHCALL(int, lchown, DUALCALL_LCHOWN, \
2104 (const char *path, uid_t owner, gid_t group), \
2105 (const char *, uid_t, gid_t), \
2106 (path, owner, group))
2107
2108 PATHCALL(int, chmod, DUALCALL_CHMOD, \
2109 (const char *path, mode_t mode), \
2110 (const char *, mode_t), \
2111 (path, mode))
2112
2113 PATHCALL(int, lchmod, DUALCALL_LCHMOD, \
2114 (const char *path, mode_t mode), \
2115 (const char *, mode_t), \
2116 (path, mode))
2117
2118 PATHCALL(int, statvfs1, DUALCALL_STATVFS1, \
2119 (const char *path, struct statvfs *buf, int flags), \
2120 (const char *, struct statvfs *, int), \
2121 (path, buf, flags))
2122
2123 PATHCALL(int, unlink, DUALCALL_UNLINK, \
2124 (const char *path), \
2125 (const char *), \
2126 (path))
2127
2128 PATHCALL(int, symlink, DUALCALL_SYMLINK, \
2129 (const char *target, const char *path), \
2130 (const char *, const char *), \
2131 (target, path))
2132
2133 PATHCALL(ssize_t, readlink, DUALCALL_READLINK, \
2134 (const char *path, char *buf, size_t bufsiz), \
2135 (const char *, char *, size_t), \
2136 (path, buf, bufsiz))
2137
2138 PATHCALL(int, mkdir, DUALCALL_MKDIR, \
2139 (const char *path, mode_t mode), \
2140 (const char *, mode_t), \
2141 (path, mode))
2142
2143 PATHCALL(int, rmdir, DUALCALL_RMDIR, \
2144 (const char *path), \
2145 (const char *), \
2146 (path))
2147
2148 PATHCALL(int, utimes, DUALCALL_UTIMES, \
2149 (const char *path, const struct timeval *tv), \
2150 (const char *, const struct timeval *), \
2151 (path, tv))
2152
2153 PATHCALL(int, lutimes, DUALCALL_LUTIMES, \
2154 (const char *path, const struct timeval *tv), \
2155 (const char *, const struct timeval *), \
2156 (path, tv))
2157
2158 PATHCALL(int, chflags, DUALCALL_CHFLAGS, \
2159 (const char *path, u_long flags), \
2160 (const char *, u_long), \
2161 (path, flags))
2162
2163 PATHCALL(int, lchflags, DUALCALL_LCHFLAGS, \
2164 (const char *path, u_long flags), \
2165 (const char *, u_long), \
2166 (path, flags))
2167
2168 PATHCALL(int, truncate, DUALCALL_TRUNCATE, \
2169 (const char *path, off_t length), \
2170 (const char *, off_t), \
2171 (path, length))
2172
2173 PATHCALL(int, access, DUALCALL_ACCESS, \
2174 (const char *path, int mode), \
2175 (const char *, int), \
2176 (path, mode))
2177
2178 PATHCALL(int, REALMKNOD, DUALCALL_MKNOD, \
2179 (const char *path, mode_t mode, dev_t dev), \
2180 (const char *, mode_t, dev_t), \
2181 (path, mode, dev))
2182
2183 /*
2184 * Note: with mount the decisive parameter is the mount
2185 * destination directory. This is because we don't really know
2186 * about the "source" directory in a generic call (and besides,
2187 * it might not even exist, cf. nfs).
2188 */
2189 PATHCALL(int, REALMOUNT, DUALCALL_MOUNT, \
2190 (const char *type, const char *path, int flags, \
2191 void *data, size_t dlen), \
2192 (const char *, const char *, int, void *, size_t), \
2193 (type, path, flags, data, dlen))
2194
2195 PATHCALL(int, unmount, DUALCALL_UNMOUNT, \
2196 (const char *path, int flags), \
2197 (const char *, int), \
2198 (path, flags))
2199
2200 #if __NetBSD_Prereq__(5,99,48)
2201 PATHCALL(int, REALQUOTACTL, DUALCALL_QUOTACTL, \
2202 (const char *path, struct plistref *p), \
2203 (const char *, struct plistref *), \
2204 (path, p))
2205 #endif
2206
2207 PATHCALL(int, REALGETFH, DUALCALL_GETFH, \
2208 (const char *path, void *fhp, size_t *fh_size), \
2209 (const char *, void *, size_t *), \
2210 (path, fhp, fh_size))
2211
2212 /*
2213 * These act different on a per-process vfs configuration
2214 */
2215
2216 VFSCALL(VFSBIT_GETVFSSTAT, int, getvfsstat, DUALCALL_GETVFSSTAT, \
2217 (struct statvfs *buf, size_t buflen, int flags), \
2218 (struct statvfs *, size_t, int), \
2219 (buf, buflen, flags))
2220
2221 VFSCALL(VFSBIT_FHCALLS, int, REALFHOPEN, DUALCALL_FHOPEN, \
2222 (const void *fhp, size_t fh_size, int flags), \
2223 (const char *, size_t, int), \
2224 (fhp, fh_size, flags))
2225
2226 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTAT, DUALCALL_FHSTAT, \
2227 (const void *fhp, size_t fh_size, struct stat *sb), \
2228 (const char *, size_t, struct stat *), \
2229 (fhp, fh_size, sb))
2230
2231 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTATVFS1, DUALCALL_FHSTATVFS1, \
2232 (const void *fhp, size_t fh_size, struct statvfs *sb, int flgs),\
2233 (const char *, size_t, struct statvfs *, int), \
2234 (fhp, fh_size, sb, flgs))
2235
2236 /* finally, put nfssvc here. "keep the namespace clean" */
2237
2238 #include <nfs/rpcv2.h>
2239 #include <nfs/nfs.h>
2240
2241 int
2242 nfssvc(int flags, void *argstructp)
2243 {
2244 int (*op_nfssvc)(int, void *);
2245
2246 if (vfsbits & VFSBIT_NFSSVC){
2247 struct nfsd_args *nfsdargs;
2248
2249 /* massage the socket descriptor if necessary */
2250 if (flags == NFSSVC_ADDSOCK) {
2251 nfsdargs = argstructp;
2252 nfsdargs->sock = fd_host2rump(nfsdargs->sock);
2253 }
2254 op_nfssvc = GETSYSCALL(rump, NFSSVC);
2255 } else
2256 op_nfssvc = GETSYSCALL(host, NFSSVC);
2257
2258 return op_nfssvc(flags, argstructp);
2259 }
2260