hijack.c revision 1.136.2.1 1 /* $NetBSD: hijack.c,v 1.136.2.1 2024/08/22 19:34:37 martin Exp $ */
2
3 /*-
4 * Copyright (c) 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * XXX: rumphijack sort of works on glibc Linux. But it's not
30 * the same quality working as on NetBSD.
31 * autoconf HAVE_FOO vs. __NetBSD__ / __linux__ could be further
32 * improved.
33 */
34 #include <rump/rumpuser_port.h>
35
36 #if !defined(lint)
37 __RCSID("$NetBSD: hijack.c,v 1.136.2.1 2024/08/22 19:34:37 martin Exp $");
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/types.h>
42 #include <sys/ioctl.h>
43 #include <sys/mman.h>
44 #include <sys/mount.h>
45 #include <sys/socket.h>
46 #include <sys/stat.h>
47 #include <sys/time.h>
48 #include <sys/uio.h>
49
50 #ifdef __NetBSD__
51 #include <sys/statvfs.h>
52 #endif
53
54 #ifdef HAVE_KQUEUE
55 #include <sys/event.h>
56 #endif
57
58 #ifdef __NetBSD__
59 #include <sys/quotactl.h>
60 #endif
61
62 #include <assert.h>
63 #include <dlfcn.h>
64 #include <err.h>
65 #include <errno.h>
66 #include <fcntl.h>
67 #include <poll.h>
68 #include <pthread.h>
69 #include <signal.h>
70 #include <stdarg.h>
71 #include <stdbool.h>
72 #include <stdint.h>
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <time.h>
77 #include <unistd.h>
78
79 #include <rump/rumpclient.h>
80 #include <rump/rump_syscalls.h>
81
82 #include "hijack.h"
83
84 /*
85 * XXX: Consider autogenerating this, syscnames[] and syscalls[] with
86 * a DSL where the tool also checks the symbols exported by this library
87 * to make sure all relevant calls are accounted for.
88 */
89 enum dualcall {
90 DUALCALL_WRITE, DUALCALL_WRITEV, DUALCALL_PWRITE, DUALCALL_PWRITEV,
91 DUALCALL_IOCTL, DUALCALL_FCNTL, DUALCALL_FLOCK,
92 DUALCALL_SOCKET, DUALCALL_ACCEPT,
93 #ifndef __linux__
94 DUALCALL_PACCEPT,
95 #endif
96 DUALCALL_BIND, DUALCALL_CONNECT,
97 DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN,
98 DUALCALL_RECVFROM, DUALCALL_RECVMSG,
99 DUALCALL_SENDTO, DUALCALL_SENDMSG,
100 DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT,
101 DUALCALL_SHUTDOWN,
102 DUALCALL_READ, DUALCALL_READV, DUALCALL_PREAD, DUALCALL_PREADV,
103 DUALCALL_DUP2,
104 DUALCALL_CLOSE,
105 DUALCALL_POLLTS,
106
107 #ifndef __linux__
108 DUALCALL_STAT, DUALCALL_LSTAT, DUALCALL_FSTAT,
109 #endif
110
111 DUALCALL_CHMOD, DUALCALL_LCHMOD, DUALCALL_FCHMOD,
112 DUALCALL_CHOWN, DUALCALL_LCHOWN, DUALCALL_FCHOWN,
113 DUALCALL_OPEN,
114 DUALCALL_CHDIR, DUALCALL_FCHDIR,
115 DUALCALL_LSEEK,
116 DUALCALL_UNLINK, DUALCALL_SYMLINK, DUALCALL_READLINK,
117 DUALCALL_LINK, DUALCALL_RENAME,
118 DUALCALL_MKDIR, DUALCALL_RMDIR,
119 DUALCALL_UTIMES, DUALCALL_LUTIMES, DUALCALL_FUTIMES,
120 DUALCALL_UTIMENSAT, DUALCALL_FUTIMENS,
121 DUALCALL_TRUNCATE, DUALCALL_FTRUNCATE,
122 DUALCALL_FSYNC,
123 DUALCALL_ACCESS,
124
125 #ifndef __linux__
126 DUALCALL___GETCWD,
127 DUALCALL_GETDENTS,
128 #endif
129
130 #ifndef __linux__
131 DUALCALL_MKNOD,
132 #endif
133
134 #ifdef __NetBSD__
135 DUALCALL_GETFH, DUALCALL_FHOPEN, DUALCALL_FHSTAT, DUALCALL_FHSTATVFS1,
136 #endif
137
138 #ifdef HAVE_KQUEUE
139 DUALCALL_KEVENT,
140 #endif
141
142 #ifdef __NetBSD__
143 DUALCALL___SYSCTL,
144 DUALCALL_MODCTL,
145 #endif
146
147 #ifdef __NetBSD__
148 DUALCALL_NFSSVC,
149 #endif
150
151 #ifdef __NetBSD__
152 DUALCALL_STATVFS1, DUALCALL_FSTATVFS1, DUALCALL_GETVFSSTAT,
153 #endif
154
155 #ifdef __NetBSD__
156 DUALCALL_MOUNT, DUALCALL_UNMOUNT,
157 #endif
158
159 #ifdef HAVE_FSYNC_RANGE
160 DUALCALL_FSYNC_RANGE,
161 #endif
162
163 #ifdef HAVE_CHFLAGS
164 DUALCALL_CHFLAGS, DUALCALL_LCHFLAGS, DUALCALL_FCHFLAGS,
165 #endif
166
167 #ifdef HAVE___QUOTACTL
168 DUALCALL_QUOTACTL,
169 #endif
170 #ifdef __NetBSD__
171 DUALCALL_LINKAT,
172 #endif
173 DUALCALL_PATHCONF,
174 DUALCALL_LPATHCONF,
175
176 DUALCALL__NUM
177 };
178
179 #define RSYS_STRING(a) __STRING(a)
180 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
181
182 /*
183 * Would be nice to get this automatically in sync with libc.
184 * Also, this does not work for compat-using binaries (we should
185 * provide all previous interfaces, not just the current ones)
186 */
187 #if defined(__NetBSD__)
188
189 #if !__NetBSD_Prereq__(5,99,7)
190 #define REALPSELECT pselect
191 #define REALSELECT select
192 #define REALPOLLTS pollts
193 #define REALKEVENT kevent
194 #define REALSTAT __stat30
195 #define REALLSTAT __lstat30
196 #define REALFSTAT __fstat30
197 #define REALUTIMES utimes
198 #define REALLUTIMES lutimes
199 #define REALFUTIMES futimes
200 #define REALMKNOD mknod
201 #define REALFHSTAT __fhstat40
202 #else /* >= 5.99.7 */
203 #define REALPSELECT _sys___pselect50
204 #define REALSELECT _sys___select50
205 #define REALPOLLTS _sys___pollts50
206 #define REALKEVENT _sys___kevent50
207 #define REALSTAT __stat50
208 #define REALLSTAT __lstat50
209 #define REALFSTAT __fstat50
210 #define REALUTIMES __utimes50
211 #define REALLUTIMES __lutimes50
212 #define REALFUTIMES __futimes50
213 #define REALMKNOD __mknod50
214 #define REALFHSTAT __fhstat50
215 #endif /* < 5.99.7 */
216
217 #define REALREAD _sys_read
218 #define REALPREAD _sys_pread
219 #define REALPWRITE _sys_pwrite
220 #define REALGETDENTS __getdents30
221 #define REALMOUNT __mount50
222 #define REALGETFH __getfh30
223 #define REALFHOPEN __fhopen40
224 #if !__NetBSD_Prereq__(9,99,13)
225 #define REALSTATVFS1 statvfs1
226 #define REALFSTATVFS1 fstatvfs1
227 #define REALGETVFSSTAT getvfsstat
228 #define REALFHSTATVFS1 __fhstatvfs140
229 #else
230 #define REALSTATVFS1 __statvfs190
231 #define REALFSTATVFS1 __fstatvfs190
232 #define REALGETVFSSTAT __getvfsstat90
233 #define REALFHSTATVFS1 __fhstatvfs190
234 #endif
235 #define REALSOCKET __socket30
236
237 #define LSEEK_ALIAS _lseek
238 #define VFORK __vfork14
239
240 int REALSTAT(const char *, struct stat *);
241 int REALLSTAT(const char *, struct stat *);
242 int REALFSTAT(int, struct stat *);
243 int REALMKNOD(const char *, mode_t, dev_t);
244 int REALGETDENTS(int, char *, size_t);
245
246 int __getcwd(char *, size_t);
247
248 #elif defined(__linux__) /* glibc, really */
249
250 #define REALREAD read
251 #define REALPREAD pread
252 #define REALPWRITE pwrite
253 #define REALPSELECT pselect
254 #define REALSELECT select
255 #define REALPOLLTS ppoll
256 #define REALUTIMES utimes
257 #define REALLUTIMES lutimes
258 #define REALFUTIMES futimes
259 #define REALFHSTAT fhstat
260 #define REALSOCKET socket
261
262 #else /* !NetBSD && !linux */
263
264 #error platform not supported
265
266 #endif /* platform */
267
268 int REALPSELECT(int, fd_set *, fd_set *, fd_set *, const struct timespec *,
269 const sigset_t *);
270 int REALSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
271 int REALPOLLTS(struct pollfd *, nfds_t,
272 const struct timespec *, const sigset_t *);
273 int REALKEVENT(int, const struct kevent *, size_t, struct kevent *, size_t,
274 const struct timespec *);
275 ssize_t REALREAD(int, void *, size_t);
276 ssize_t REALPREAD(int, void *, size_t, off_t);
277 ssize_t REALPWRITE(int, const void *, size_t, off_t);
278 int REALUTIMES(const char *, const struct timeval [2]);
279 int REALLUTIMES(const char *, const struct timeval [2]);
280 int REALFUTIMES(int, const struct timeval [2]);
281 int REALMOUNT(const char *, const char *, int, void *, size_t);
282 int REALGETFH(const char *, void *, size_t *);
283 int REALFHOPEN(const void *, size_t, int);
284 int REALFHSTAT(const void *, size_t, struct stat *);
285 int REALSTATVFS1(const char *, struct statvfs *, int);
286 int REALFSTATVFS1(int, struct statvfs *, int);
287 int REALFHSTATVFS1(const void *, size_t, struct statvfs *, int);
288 int REALGETVFSSTAT(struct statvfs *, size_t, int);
289 int REALSOCKET(int, int, int);
290
291 #define S(a) __STRING(a)
292 struct sysnames {
293 enum dualcall scm_callnum;
294 const char *scm_hostname;
295 const char *scm_rumpname;
296 } syscnames[] = {
297 { DUALCALL_SOCKET, S(REALSOCKET), RSYS_NAME(SOCKET) },
298 { DUALCALL_ACCEPT, "accept", RSYS_NAME(ACCEPT) },
299 #ifndef __linux__
300 { DUALCALL_PACCEPT, "paccept", RSYS_NAME(PACCEPT) },
301 #endif
302 { DUALCALL_BIND, "bind", RSYS_NAME(BIND) },
303 { DUALCALL_CONNECT, "connect", RSYS_NAME(CONNECT) },
304 { DUALCALL_GETPEERNAME, "getpeername", RSYS_NAME(GETPEERNAME) },
305 { DUALCALL_GETSOCKNAME, "getsockname", RSYS_NAME(GETSOCKNAME) },
306 { DUALCALL_LISTEN, "listen", RSYS_NAME(LISTEN) },
307 { DUALCALL_RECVFROM, "recvfrom", RSYS_NAME(RECVFROM) },
308 { DUALCALL_RECVMSG, "recvmsg", RSYS_NAME(RECVMSG) },
309 { DUALCALL_SENDTO, "sendto", RSYS_NAME(SENDTO) },
310 { DUALCALL_SENDMSG, "sendmsg", RSYS_NAME(SENDMSG) },
311 { DUALCALL_GETSOCKOPT, "getsockopt", RSYS_NAME(GETSOCKOPT) },
312 { DUALCALL_SETSOCKOPT, "setsockopt", RSYS_NAME(SETSOCKOPT) },
313 { DUALCALL_SHUTDOWN, "shutdown", RSYS_NAME(SHUTDOWN) },
314 { DUALCALL_READ, S(REALREAD), RSYS_NAME(READ) },
315 { DUALCALL_READV, "readv", RSYS_NAME(READV) },
316 { DUALCALL_PREAD, S(REALPREAD), RSYS_NAME(PREAD) },
317 { DUALCALL_PREADV, "preadv", RSYS_NAME(PREADV) },
318 { DUALCALL_WRITE, "write", RSYS_NAME(WRITE) },
319 { DUALCALL_WRITEV, "writev", RSYS_NAME(WRITEV) },
320 { DUALCALL_PWRITE, S(REALPWRITE), RSYS_NAME(PWRITE) },
321 { DUALCALL_PWRITEV, "pwritev", RSYS_NAME(PWRITEV) },
322 { DUALCALL_IOCTL, "ioctl", RSYS_NAME(IOCTL) },
323 { DUALCALL_FCNTL, "fcntl", RSYS_NAME(FCNTL) },
324 { DUALCALL_FLOCK, "flock", RSYS_NAME(FLOCK) },
325 { DUALCALL_DUP2, "dup2", RSYS_NAME(DUP2) },
326 { DUALCALL_CLOSE, "close", RSYS_NAME(CLOSE) },
327 { DUALCALL_POLLTS, S(REALPOLLTS), RSYS_NAME(POLLTS) },
328 #ifndef __linux__
329 { DUALCALL_STAT, S(REALSTAT), RSYS_NAME(STAT) },
330 { DUALCALL_LSTAT, S(REALLSTAT), RSYS_NAME(LSTAT) },
331 { DUALCALL_FSTAT, S(REALFSTAT), RSYS_NAME(FSTAT) },
332 #endif
333 { DUALCALL_CHOWN, "chown", RSYS_NAME(CHOWN) },
334 { DUALCALL_LCHOWN, "lchown", RSYS_NAME(LCHOWN) },
335 { DUALCALL_FCHOWN, "fchown", RSYS_NAME(FCHOWN) },
336 { DUALCALL_CHMOD, "chmod", RSYS_NAME(CHMOD) },
337 { DUALCALL_LCHMOD, "lchmod", RSYS_NAME(LCHMOD) },
338 { DUALCALL_FCHMOD, "fchmod", RSYS_NAME(FCHMOD) },
339 { DUALCALL_UTIMES, S(REALUTIMES), RSYS_NAME(UTIMES) },
340 { DUALCALL_LUTIMES, S(REALLUTIMES), RSYS_NAME(LUTIMES) },
341 { DUALCALL_FUTIMES, S(REALFUTIMES), RSYS_NAME(FUTIMES) },
342 { DUALCALL_UTIMENSAT, "utimensat", RSYS_NAME(UTIMENSAT) },
343 { DUALCALL_FUTIMENS, "futimens", RSYS_NAME(FUTIMENS) },
344 { DUALCALL_OPEN, "open", RSYS_NAME(OPEN) },
345 { DUALCALL_CHDIR, "chdir", RSYS_NAME(CHDIR) },
346 { DUALCALL_FCHDIR, "fchdir", RSYS_NAME(FCHDIR) },
347 { DUALCALL_LSEEK, "lseek", RSYS_NAME(LSEEK) },
348 { DUALCALL_UNLINK, "unlink", RSYS_NAME(UNLINK) },
349 { DUALCALL_SYMLINK, "symlink", RSYS_NAME(SYMLINK) },
350 { DUALCALL_READLINK, "readlink", RSYS_NAME(READLINK) },
351 { DUALCALL_LINK, "link", RSYS_NAME(LINK) },
352 { DUALCALL_RENAME, "rename", RSYS_NAME(RENAME) },
353 { DUALCALL_MKDIR, "mkdir", RSYS_NAME(MKDIR) },
354 { DUALCALL_RMDIR, "rmdir", RSYS_NAME(RMDIR) },
355 { DUALCALL_TRUNCATE, "truncate", RSYS_NAME(TRUNCATE) },
356 { DUALCALL_FTRUNCATE, "ftruncate", RSYS_NAME(FTRUNCATE) },
357 { DUALCALL_FSYNC, "fsync", RSYS_NAME(FSYNC) },
358 { DUALCALL_ACCESS, "access", RSYS_NAME(ACCESS) },
359
360 #ifndef __linux__
361 { DUALCALL___GETCWD, "__getcwd", RSYS_NAME(__GETCWD) },
362 { DUALCALL_GETDENTS, S(REALGETDENTS),RSYS_NAME(GETDENTS) },
363 #endif
364
365 #ifndef __linux__
366 { DUALCALL_MKNOD, S(REALMKNOD), RSYS_NAME(MKNOD) },
367 #endif
368
369 #ifdef __NetBSD__
370 { DUALCALL_GETFH, S(REALGETFH), RSYS_NAME(GETFH) },
371 { DUALCALL_FHOPEN, S(REALFHOPEN), RSYS_NAME(FHOPEN) },
372 { DUALCALL_FHSTAT, S(REALFHSTAT), RSYS_NAME(FHSTAT) },
373 { DUALCALL_FHSTATVFS1, S(REALFHSTATVFS1),RSYS_NAME(FHSTATVFS1) },
374 #endif
375
376 #ifdef HAVE_KQUEUE
377 { DUALCALL_KEVENT, S(REALKEVENT), RSYS_NAME(KEVENT) },
378 #endif
379
380 #ifdef __NetBSD__
381 { DUALCALL___SYSCTL, "__sysctl", RSYS_NAME(__SYSCTL) },
382 { DUALCALL_MODCTL, "modctl", RSYS_NAME(MODCTL) },
383 #endif
384
385 #ifdef __NetBSD__
386 { DUALCALL_NFSSVC, "nfssvc", RSYS_NAME(NFSSVC) },
387 #endif
388
389 #ifdef __NetBSD__
390 { DUALCALL_STATVFS1, S(REALSTATVFS1),RSYS_NAME(STATVFS1) },
391 { DUALCALL_FSTATVFS1, S(REALFSTATVFS1),RSYS_NAME(FSTATVFS1) },
392 { DUALCALL_GETVFSSTAT, S(REALGETVFSSTAT),RSYS_NAME(GETVFSSTAT) },
393 #endif
394
395 #ifdef __NetBSD__
396 { DUALCALL_MOUNT, S(REALMOUNT), RSYS_NAME(MOUNT) },
397 { DUALCALL_UNMOUNT, "unmount", RSYS_NAME(UNMOUNT) },
398 #endif
399
400 #ifdef HAVE_FSYNC_RANGE
401 { DUALCALL_FSYNC_RANGE, "fsync_range", RSYS_NAME(FSYNC_RANGE) },
402 #endif
403
404 #ifdef HAVE_CHFLAGS
405 { DUALCALL_CHFLAGS, "chflags", RSYS_NAME(CHFLAGS) },
406 { DUALCALL_LCHFLAGS, "lchflags", RSYS_NAME(LCHFLAGS) },
407 { DUALCALL_FCHFLAGS, "fchflags", RSYS_NAME(FCHFLAGS) },
408 #endif /* HAVE_CHFLAGS */
409
410 #ifdef HAVE___QUOTACTL
411 { DUALCALL_QUOTACTL, "__quotactl", RSYS_NAME(__QUOTACTL) },
412 #endif /* HAVE___QUOTACTL */
413
414 #ifdef __NetBSD__
415 { DUALCALL_LINKAT, "linkat", RSYS_NAME(LINKAT) },
416 #endif
417 { DUALCALL_PATHCONF, "pathconf", RSYS_NAME(PATHCONF) },
418 { DUALCALL_LPATHCONF, "lpathconf", RSYS_NAME(LPATHCONF) },
419 };
420 #undef S
421
422 struct bothsys {
423 void *bs_host;
424 void *bs_rump;
425 } syscalls[DUALCALL__NUM];
426 #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which
427
428 static pid_t (*host_fork)(void);
429 static int (*host_daemon)(int, int);
430 static void * (*host_mmap)(void *, size_t, int, int, int, off_t);
431
432 /*
433 * This tracks if our process is in a subdirectory of /rump.
434 * It's preserved over exec.
435 */
436 static bool pwdinrump;
437
438 enum pathtype { PATH_HOST, PATH_RUMP, PATH_RUMPBLANKET };
439
440 static bool fd_isrump(int);
441 static enum pathtype path_isrump(const char *);
442
443 /* default FD_SETSIZE is 256 ==> default fdoff is 128 */
444 static int hijack_fdoff = FD_SETSIZE/2;
445
446 /*
447 * Maintain a mapping table for the usual dup2 suspects.
448 * Could use atomic ops to operate on dup2vec, but an application
449 * racing there is not well-defined, so don't bother.
450 */
451 /* note: you cannot change this without editing the env-passing code */
452 #define DUP2HIGH 2
453 static uint32_t dup2vec[DUP2HIGH+1];
454 #define DUP2BIT (1U<<31)
455 #define DUP2ALIAS (1U<<30)
456 #define DUP2FDMASK ((1U<<30)-1)
457
458 static bool
459 isdup2d(int fd)
460 {
461
462 return fd <= DUP2HIGH && fd >= 0 && dup2vec[fd] & DUP2BIT;
463 }
464
465 static int
466 mapdup2(int hostfd)
467 {
468
469 _DIAGASSERT(isdup2d(hostfd));
470 return dup2vec[hostfd] & DUP2FDMASK;
471 }
472
473 static int
474 unmapdup2(int rumpfd)
475 {
476 int i;
477
478 for (i = 0; i <= DUP2HIGH; i++) {
479 if (dup2vec[i] & DUP2BIT &&
480 (dup2vec[i] & DUP2FDMASK) == (unsigned)rumpfd)
481 return i;
482 }
483 return -1;
484 }
485
486 static void
487 setdup2(int hostfd, int rumpfd)
488 {
489
490 if (hostfd > DUP2HIGH) {
491 _DIAGASSERT(/*CONSTCOND*/0);
492 return;
493 }
494
495 dup2vec[hostfd] = DUP2BIT | DUP2ALIAS | rumpfd;
496 }
497
498 static void
499 clrdup2(int hostfd)
500 {
501
502 if (hostfd > DUP2HIGH) {
503 _DIAGASSERT(/*CONSTCOND*/0);
504 return;
505 }
506
507 dup2vec[hostfd] = 0;
508 }
509
510 static bool
511 killdup2alias(int rumpfd)
512 {
513 int hostfd;
514
515 if ((hostfd = unmapdup2(rumpfd)) == -1)
516 return false;
517
518 if (dup2vec[hostfd] & DUP2ALIAS) {
519 dup2vec[hostfd] &= ~DUP2ALIAS;
520 return true;
521 }
522 return false;
523 }
524
525 //#define DEBUGJACK
526 #ifdef DEBUGJACK
527 #define DPRINTF(x) mydprintf x
528 static void
529 mydprintf(const char *fmt, ...)
530 {
531 va_list ap;
532
533 if (isdup2d(STDERR_FILENO))
534 return;
535
536 va_start(ap, fmt);
537 vfprintf(stderr, fmt, ap);
538 va_end(ap);
539 }
540
541 static const char *
542 whichfd(int fd)
543 {
544
545 if (fd == -1)
546 return "-1";
547 else if (fd_isrump(fd))
548 return "rump";
549 else
550 return "host";
551 }
552
553 static const char *
554 whichpath(const char *path)
555 {
556
557 if (path_isrump(path))
558 return "rump";
559 else
560 return "host";
561 }
562
563 #else
564 #define DPRINTF(x)
565 #endif
566
567 #define ATCALL(type, name, rcname, args, proto, vars) \
568 type name args \
569 { \
570 type (*fun) proto; \
571 int isrump = -1; \
572 \
573 if (fd == AT_FDCWD || *path == '/') { \
574 isrump = path_isrump(path); \
575 } else { \
576 isrump = fd_isrump(fd); \
577 } \
578 \
579 DPRINTF(("%s -> %d:%s (%s)\n", __STRING(name), \
580 fd, path, isrump ? "rump" : "host")); \
581 \
582 assert(isrump != -1); \
583 if (isrump) { \
584 fun = syscalls[rcname].bs_rump; \
585 if (fd != AT_FDCWD) \
586 fd = fd_host2rump(fd); \
587 path = path_host2rump(path); \
588 } else { \
589 fun = syscalls[rcname].bs_host; \
590 } \
591 return fun vars; \
592 }
593
594 #define FDCALL(type, name, rcname, args, proto, vars) \
595 type name args \
596 { \
597 type (*fun) proto; \
598 \
599 DPRINTF(("%s -> %d (%s)\n", __STRING(name), fd, whichfd(fd))); \
600 if (fd_isrump(fd)) { \
601 fun = syscalls[rcname].bs_rump; \
602 fd = fd_host2rump(fd); \
603 } else { \
604 fun = syscalls[rcname].bs_host; \
605 } \
606 \
607 return fun vars; \
608 }
609
610 #define PATHCALL(type, name, rcname, args, proto, vars) \
611 type name args \
612 { \
613 type (*fun) proto; \
614 enum pathtype pt; \
615 \
616 DPRINTF(("%s -> %s (%s)\n", __STRING(name), path, \
617 whichpath(path))); \
618 if ((pt = path_isrump(path)) != PATH_HOST) { \
619 fun = syscalls[rcname].bs_rump; \
620 if (pt == PATH_RUMP) \
621 path = path_host2rump(path); \
622 } else { \
623 fun = syscalls[rcname].bs_host; \
624 } \
625 \
626 return fun vars; \
627 }
628
629 #define VFSCALL(bit, type, name, rcname, args, proto, vars) \
630 type name args \
631 { \
632 type (*fun) proto; \
633 \
634 DPRINTF(("%s (0x%x, 0x%x)\n", __STRING(name), bit, vfsbits)); \
635 if (vfsbits & bit) { \
636 fun = syscalls[rcname].bs_rump; \
637 } else { \
638 fun = syscalls[rcname].bs_host; \
639 } \
640 \
641 return fun vars; \
642 }
643
644 /*
645 * These variables are set from the RUMPHIJACK string and control
646 * which operations can product rump kernel file descriptors.
647 * This should be easily extendable for future needs.
648 */
649 #define RUMPHIJACK_DEFAULT "path=/rump,socket=all:nolocal"
650 static bool rumpsockets[PF_MAX];
651 static const char *rumpprefix;
652 static size_t rumpprefixlen;
653
654 static struct {
655 int pf;
656 const char *name;
657 } socketmap[] = {
658 { PF_LOCAL, "local" },
659 { PF_INET, "inet" },
660 #ifdef PF_LINK
661 { PF_LINK, "link" },
662 #endif
663 #ifdef PF_OROUTE
664 { PF_OROUTE, "oroute" },
665 #endif
666 { PF_ROUTE, "route" },
667 { PF_INET6, "inet6" },
668 #ifdef PF_MPLS
669 { PF_MPLS, "mpls" },
670 #endif
671 { -1, NULL }
672 };
673
674 static void
675 sockparser(char *buf)
676 {
677 char *p, *l = NULL;
678 bool value;
679 int i;
680
681 /* if "all" is present, it must be specified first */
682 if (strncmp(buf, "all", strlen("all")) == 0) {
683 for (i = 0; i < (int)__arraycount(rumpsockets); i++) {
684 rumpsockets[i] = true;
685 }
686 buf += strlen("all");
687 if (*buf == ':')
688 buf++;
689 }
690
691 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) {
692 value = true;
693 if (strncmp(p, "no", strlen("no")) == 0) {
694 value = false;
695 p += strlen("no");
696 }
697
698 for (i = 0; socketmap[i].name; i++) {
699 if (strcmp(p, socketmap[i].name) == 0) {
700 rumpsockets[socketmap[i].pf] = value;
701 break;
702 }
703 }
704 if (socketmap[i].name == NULL) {
705 errx(EXIT_FAILURE, "invalid socket specifier %s", p);
706 }
707 }
708 }
709
710 static void
711 pathparser(char *buf)
712 {
713
714 /* sanity-check */
715 if (*buf != '/')
716 errx(EXIT_FAILURE,
717 "hijack path specifier must begin with ``/''");
718 rumpprefixlen = strlen(buf);
719 if (rumpprefixlen < 2)
720 errx(EXIT_FAILURE, "invalid hijack prefix: %s", buf);
721 if (buf[rumpprefixlen-1] == '/' && strspn(buf, "/") != rumpprefixlen)
722 errx(EXIT_FAILURE, "hijack prefix may end in slash only if "
723 "pure slash, gave %s", buf);
724
725 if ((rumpprefix = strdup(buf)) == NULL)
726 err(EXIT_FAILURE, "strdup");
727 rumpprefixlen = strlen(rumpprefix);
728 }
729
730 static struct blanket {
731 const char *pfx;
732 size_t len;
733 } *blanket;
734 static int nblanket;
735
736 static void
737 blanketparser(char *buf)
738 {
739 char *p, *l = NULL;
740 int i;
741
742 for (nblanket = 0, p = buf; p; p = strchr(p+1, ':'), nblanket++)
743 continue;
744
745 blanket = malloc(nblanket * sizeof(*blanket));
746 if (blanket == NULL)
747 err(EXIT_FAILURE, "alloc blanket %d", nblanket);
748
749 for (p = strtok_r(buf, ":", &l), i = 0; p;
750 p = strtok_r(NULL, ":", &l), i++) {
751 blanket[i].pfx = strdup(p);
752 if (blanket[i].pfx == NULL)
753 err(EXIT_FAILURE, "strdup blanket");
754 blanket[i].len = strlen(p);
755
756 if (blanket[i].len == 0 || *blanket[i].pfx != '/')
757 errx(EXIT_FAILURE, "invalid blanket specifier %s", p);
758 if (*(blanket[i].pfx + blanket[i].len-1) == '/')
759 errx(EXIT_FAILURE, "invalid blanket specifier %s", p);
760 }
761 }
762
763 #define VFSBIT_NFSSVC 0x01
764 #define VFSBIT_GETVFSSTAT 0x02
765 #define VFSBIT_FHCALLS 0x04
766 static unsigned vfsbits;
767
768 static struct {
769 int bit;
770 const char *name;
771 } vfscalls[] = {
772 { VFSBIT_NFSSVC, "nfssvc" },
773 { VFSBIT_GETVFSSTAT, "getvfsstat" },
774 { VFSBIT_FHCALLS, "fhcalls" },
775 { -1, NULL }
776 };
777
778 static void
779 vfsparser(char *buf)
780 {
781 char *p, *l = NULL;
782 bool turnon;
783 unsigned int fullmask;
784 int i;
785
786 /* build the full mask and sanity-check while we're at it */
787 fullmask = 0;
788 for (i = 0; vfscalls[i].name != NULL; i++) {
789 if (fullmask & vfscalls[i].bit)
790 errx(EXIT_FAILURE,
791 "problem exists between vi and chair");
792 fullmask |= vfscalls[i].bit;
793 }
794
795
796 /* if "all" is present, it must be specified first */
797 if (strncmp(buf, "all", strlen("all")) == 0) {
798 vfsbits = fullmask;
799 buf += strlen("all");
800 if (*buf == ':')
801 buf++;
802 }
803
804 for (p = strtok_r(buf, ":", &l); p; p = strtok_r(NULL, ":", &l)) {
805 turnon = true;
806 if (strncmp(p, "no", strlen("no")) == 0) {
807 turnon = false;
808 p += strlen("no");
809 }
810
811 for (i = 0; vfscalls[i].name; i++) {
812 if (strcmp(p, vfscalls[i].name) == 0) {
813 if (turnon)
814 vfsbits |= vfscalls[i].bit;
815 else
816 vfsbits &= ~vfscalls[i].bit;
817 break;
818 }
819 }
820 if (vfscalls[i].name == NULL) {
821 errx(EXIT_FAILURE, "invalid vfscall specifier %s", p);
822 }
823 }
824 }
825
826 static bool rumpsysctl = false;
827
828 static void
829 sysctlparser(char *buf)
830 {
831
832 if (buf == NULL) {
833 rumpsysctl = true;
834 return;
835 }
836
837 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 ||
838 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) {
839 rumpsysctl = true;
840 return;
841 }
842 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) {
843 rumpsysctl = false;
844 return;
845 }
846
847 errx(EXIT_FAILURE, "sysctl value should be y(es)/n(o), gave: %s", buf);
848 }
849
850 static bool rumpmodctl = false;
851
852 static void
853 modctlparser(char *buf)
854 {
855
856 if (buf == NULL) {
857 rumpmodctl = true;
858 return;
859 }
860
861 if (strcasecmp(buf, "y") == 0 || strcasecmp(buf, "yes") == 0 ||
862 strcasecmp(buf, "yep") == 0 || strcasecmp(buf, "tottakai") == 0) {
863 rumpmodctl = true;
864 return;
865 }
866 if (strcasecmp(buf, "n") == 0 || strcasecmp(buf, "no") == 0) {
867 rumpmodctl = false;
868 return;
869 }
870
871 errx(EXIT_FAILURE, "modctl value should be y(es)/n(o), gave: %s", buf);
872 }
873
874 static void
875 fdoffparser(char *buf)
876 {
877 unsigned long fdoff;
878 char *ep;
879
880 if (*buf == '-') {
881 errx(EXIT_FAILURE, "fdoff must not be negative");
882 }
883 fdoff = strtoul(buf, &ep, 10);
884 if (*ep != '\0')
885 errx(EXIT_FAILURE, "invalid fdoff specifier \"%s\"", buf);
886 if (fdoff >= INT_MAX/2 || fdoff < 3)
887 errx(EXIT_FAILURE, "fdoff out of range");
888 hijack_fdoff = (int)fdoff;
889 }
890
891 static struct {
892 void (*parsefn)(char *);
893 const char *name;
894 bool needvalues;
895 } hijackparse[] = {
896 { sockparser, "socket", true },
897 { pathparser, "path", true },
898 { blanketparser, "blanket", true },
899 { vfsparser, "vfs", true },
900 { sysctlparser, "sysctl", false },
901 { modctlparser, "modctl", false },
902 { fdoffparser, "fdoff", true },
903 { NULL, NULL, false },
904 };
905
906 static void
907 parsehijack(char *hijack)
908 {
909 char *p, *p2, *l;
910 const char *hijackcopy;
911 bool nop2;
912 int i;
913
914 if ((hijackcopy = strdup(hijack)) == NULL)
915 err(EXIT_FAILURE, "strdup");
916
917 /* disable everything explicitly */
918 for (i = 0; i < PF_MAX; i++)
919 rumpsockets[i] = false;
920
921 for (p = strtok_r(hijack, ",", &l); p; p = strtok_r(NULL, ",", &l)) {
922 nop2 = false;
923 p2 = strchr(p, '=');
924 if (!p2) {
925 nop2 = true;
926 p2 = p + strlen(p);
927 }
928
929 for (i = 0; hijackparse[i].parsefn; i++) {
930 if (strncmp(hijackparse[i].name, p,
931 (size_t)(p2-p)) == 0) {
932 if (nop2 && hijackparse[i].needvalues)
933 errx(EXIT_FAILURE, "invalid hijack specifier: %s",
934 hijackcopy);
935 hijackparse[i].parsefn(nop2 ? NULL : p2+1);
936 break;
937 }
938 }
939
940 if (hijackparse[i].parsefn == NULL)
941 errx(EXIT_FAILURE,
942 "invalid hijack specifier name in %s", p);
943 }
944
945 }
946
947 static void __attribute__((__constructor__))
948 rcinit(void)
949 {
950 char buf[1024];
951 unsigned i, j;
952
953 host_fork = dlsym(RTLD_NEXT, "fork");
954 host_daemon = dlsym(RTLD_NEXT, "daemon");
955 if (host_mmap == NULL)
956 host_mmap = dlsym(RTLD_NEXT, "mmap");
957
958 /*
959 * In theory cannot print anything during lookups because
960 * we might not have the call vector set up. so, the errx()
961 * is a bit of a stretch, but it might work.
962 */
963
964 for (i = 0; i < DUALCALL__NUM; i++) {
965 /* build runtime O(1) access */
966 for (j = 0; j < __arraycount(syscnames); j++) {
967 if (syscnames[j].scm_callnum == i)
968 break;
969 }
970
971 if (j == __arraycount(syscnames))
972 errx(EXIT_FAILURE,
973 "rumphijack error: syscall pos %d missing", i);
974
975 syscalls[i].bs_host = dlsym(RTLD_NEXT,
976 syscnames[j].scm_hostname);
977 if (syscalls[i].bs_host == NULL)
978 errx(EXIT_FAILURE, "hostcall %s not found!",
979 syscnames[j].scm_hostname);
980
981 syscalls[i].bs_rump = dlsym(RTLD_NEXT,
982 syscnames[j].scm_rumpname);
983 if (syscalls[i].bs_rump == NULL)
984 errx(EXIT_FAILURE, "rumpcall %s not found!",
985 syscnames[j].scm_rumpname);
986 #if 0
987 fprintf(stderr, "%s %p %s %p\n",
988 syscnames[j].scm_hostname, syscalls[i].bs_host,
989 syscnames[j].scm_rumpname, syscalls[i].bs_rump);
990 #endif
991 }
992
993 if (rumpclient_init() == -1)
994 err(EXIT_FAILURE, "rumpclient init");
995
996 /* check which syscalls we're supposed to hijack */
997 if (getenv_r("RUMPHIJACK", buf, sizeof(buf)) == -1) {
998 strcpy(buf, RUMPHIJACK_DEFAULT);
999 }
1000 parsehijack(buf);
1001
1002 /* set client persistence level */
1003 if (getenv_r("RUMPHIJACK_RETRYCONNECT", buf, sizeof(buf)) != -1) {
1004 if (strcmp(buf, "die") == 0)
1005 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_DIE);
1006 else if (strcmp(buf, "inftime") == 0)
1007 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_INFTIME);
1008 else if (strcmp(buf, "once") == 0)
1009 rumpclient_setconnretry(RUMPCLIENT_RETRYCONN_ONCE);
1010 else {
1011 time_t timeout;
1012 char *ep;
1013
1014 timeout = (time_t)strtoll(buf, &ep, 10);
1015 if (timeout <= 0 || ep != buf + strlen(buf))
1016 errx(EXIT_FAILURE,
1017 "RUMPHIJACK_RETRYCONNECT must be "
1018 "keyword or integer, got: %s", buf);
1019
1020 rumpclient_setconnretry(timeout);
1021 }
1022 }
1023
1024 if (getenv_r("RUMPHIJACK__DUP2INFO", buf, sizeof(buf)) == 0) {
1025 if (sscanf(buf, "%u,%u,%u",
1026 &dup2vec[0], &dup2vec[1], &dup2vec[2]) != 3) {
1027 warnx("invalid dup2mask: %s", buf);
1028 memset(dup2vec, 0, sizeof(dup2vec));
1029 }
1030 unsetenv("RUMPHIJACK__DUP2INFO");
1031 }
1032 if (getenv_r("RUMPHIJACK__PWDINRUMP", buf, sizeof(buf)) == 0) {
1033 pwdinrump = true;
1034 unsetenv("RUMPHIJACK__PWDINRUMP");
1035 }
1036 }
1037
1038 static int
1039 fd_rump2host(int fd)
1040 {
1041
1042 if (fd == -1)
1043 return fd;
1044 return fd + hijack_fdoff;
1045 }
1046
1047 static int
1048 fd_rump2host_withdup(int fd)
1049 {
1050 int hfd;
1051
1052 _DIAGASSERT(fd != -1);
1053 hfd = unmapdup2(fd);
1054 if (hfd != -1) {
1055 _DIAGASSERT(hfd <= DUP2HIGH);
1056 return hfd;
1057 }
1058 return fd_rump2host(fd);
1059 }
1060
1061 static int
1062 fd_host2rump(int fd)
1063 {
1064 if (!isdup2d(fd))
1065 return fd - hijack_fdoff;
1066 else
1067 return mapdup2(fd);
1068 }
1069
1070 static bool
1071 fd_isrump(int fd)
1072 {
1073
1074 return isdup2d(fd) || fd >= hijack_fdoff;
1075 }
1076
1077 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= hijack_fdoff)
1078
1079 static enum pathtype
1080 path_isrump(const char *path)
1081 {
1082 size_t plen;
1083 int i;
1084
1085 if (rumpprefix == NULL && nblanket == 0)
1086 return PATH_HOST;
1087
1088 if (*path == '/') {
1089 plen = strlen(path);
1090 if (rumpprefix && plen >= rumpprefixlen) {
1091 if (strncmp(path, rumpprefix, rumpprefixlen) == 0
1092 && (plen == rumpprefixlen
1093 || *(path + rumpprefixlen) == '/')) {
1094 return PATH_RUMP;
1095 }
1096 }
1097 for (i = 0; i < nblanket; i++) {
1098 if (strncmp(path, blanket[i].pfx, blanket[i].len) == 0)
1099 return PATH_RUMPBLANKET;
1100 }
1101
1102 return PATH_HOST;
1103 } else {
1104 return pwdinrump ? PATH_RUMP : PATH_HOST;
1105 }
1106 }
1107
1108 static const char *rootpath = "/";
1109 static const char *
1110 path_host2rump(const char *path)
1111 {
1112 const char *rv;
1113
1114 if (*path == '/') {
1115 rv = path + rumpprefixlen;
1116 if (*rv == '\0')
1117 rv = rootpath;
1118 } else {
1119 rv = path;
1120 }
1121
1122 return rv;
1123 }
1124
1125 static int
1126 dodup(int oldd, int minfd)
1127 {
1128 int (*op_fcntl)(int, int, ...);
1129 int newd;
1130 int isrump;
1131
1132 DPRINTF(("dup -> %d (minfd %d)\n", oldd, minfd));
1133 if (fd_isrump(oldd)) {
1134 op_fcntl = GETSYSCALL(rump, FCNTL);
1135 oldd = fd_host2rump(oldd);
1136 if (minfd >= hijack_fdoff)
1137 minfd -= hijack_fdoff;
1138 isrump = 1;
1139 } else {
1140 if (minfd >= hijack_fdoff) {
1141 errno = EINVAL;
1142 return -1;
1143 }
1144 op_fcntl = GETSYSCALL(host, FCNTL);
1145 isrump = 0;
1146 }
1147
1148 newd = op_fcntl(oldd, F_DUPFD, minfd);
1149
1150 if (isrump)
1151 newd = fd_rump2host(newd);
1152 DPRINTF(("dup <- %d\n", newd));
1153
1154 return newd;
1155 }
1156
1157 /*
1158 * Check that host fd value does not exceed fdoffset and if necessary
1159 * dup the file descriptor so that it doesn't collide with the dup2mask.
1160 */
1161 static int
1162 fd_host2host(int fd)
1163 {
1164 int (*op_fcntl)(int, int, ...) = GETSYSCALL(host, FCNTL);
1165 int (*op_close)(int) = GETSYSCALL(host, CLOSE);
1166 int ofd, i;
1167
1168 if (fd >= hijack_fdoff) {
1169 op_close(fd);
1170 errno = ENFILE;
1171 return -1;
1172 }
1173
1174 for (i = 1; isdup2d(fd); i++) {
1175 ofd = fd;
1176 fd = op_fcntl(ofd, F_DUPFD, i);
1177 op_close(ofd);
1178 }
1179
1180 return fd;
1181 }
1182
1183 int
1184 open(const char *path, int flags, ...)
1185 {
1186 int (*op_open)(const char *, int, ...);
1187 bool isrump;
1188 va_list ap;
1189 enum pathtype pt;
1190 int fd, rfd;
1191
1192 DPRINTF(("open -> %s (%s)", path, whichpath(path)));
1193
1194 if ((pt = path_isrump(path)) != PATH_HOST) {
1195 if (pt == PATH_RUMP)
1196 path = path_host2rump(path);
1197 op_open = GETSYSCALL(rump, OPEN);
1198 isrump = true;
1199 } else {
1200 op_open = GETSYSCALL(host, OPEN);
1201 isrump = false;
1202 }
1203
1204 va_start(ap, flags);
1205 fd = op_open(path, flags, va_arg(ap, mode_t));
1206 va_end(ap);
1207
1208 if (isrump)
1209 rfd = fd_rump2host(fd);
1210 else
1211 rfd = fd_host2host(fd);
1212
1213 DPRINTF((" <- %d/%d (%s)\n", fd, rfd, whichfd(rfd)));
1214 return rfd;
1215 }
1216
1217 int
1218 chdir(const char *path)
1219 {
1220 int (*op_chdir)(const char *);
1221 enum pathtype pt;
1222 int rv;
1223
1224 if ((pt = path_isrump(path)) != PATH_HOST) {
1225 op_chdir = GETSYSCALL(rump, CHDIR);
1226 if (pt == PATH_RUMP)
1227 path = path_host2rump(path);
1228 } else {
1229 op_chdir = GETSYSCALL(host, CHDIR);
1230 }
1231
1232 rv = op_chdir(path);
1233 if (rv == 0)
1234 pwdinrump = pt != PATH_HOST;
1235
1236 return rv;
1237 }
1238
1239 int
1240 fchdir(int fd)
1241 {
1242 int (*op_fchdir)(int);
1243 bool isrump;
1244 int rv;
1245
1246 if (fd_isrump(fd)) {
1247 op_fchdir = GETSYSCALL(rump, FCHDIR);
1248 isrump = true;
1249 fd = fd_host2rump(fd);
1250 } else {
1251 op_fchdir = GETSYSCALL(host, FCHDIR);
1252 isrump = false;
1253 }
1254
1255 rv = op_fchdir(fd);
1256 if (rv == 0) {
1257 pwdinrump = isrump;
1258 }
1259
1260 return rv;
1261 }
1262
1263 #ifndef __linux__
1264 int
1265 __getcwd(char *bufp, size_t len)
1266 {
1267 int (*op___getcwd)(char *, size_t);
1268 size_t prefixgap;
1269 bool iamslash;
1270 int rv;
1271
1272 if (pwdinrump && rumpprefix) {
1273 if (rumpprefix[rumpprefixlen-1] == '/')
1274 iamslash = true;
1275 else
1276 iamslash = false;
1277
1278 if (iamslash)
1279 prefixgap = rumpprefixlen - 1; /* ``//+path'' */
1280 else
1281 prefixgap = rumpprefixlen; /* ``/pfx+/path'' */
1282 if (len <= prefixgap) {
1283 errno = ERANGE;
1284 return -1;
1285 }
1286
1287 op___getcwd = GETSYSCALL(rump, __GETCWD);
1288 rv = op___getcwd(bufp + prefixgap, len - prefixgap);
1289 if (rv == -1)
1290 return rv;
1291
1292 /* augment the "/" part only for a non-root path */
1293 memcpy(bufp, rumpprefix, rumpprefixlen);
1294
1295 /* append / only to non-root cwd */
1296 if (rv != 2)
1297 bufp[prefixgap] = '/';
1298
1299 /* don't append extra slash in the purely-slash case */
1300 if (rv == 2 && !iamslash)
1301 bufp[rumpprefixlen] = '\0';
1302 } else if (pwdinrump) {
1303 /* assume blanket. we can't provide a prefix here */
1304 op___getcwd = GETSYSCALL(rump, __GETCWD);
1305 rv = op___getcwd(bufp, len);
1306 } else {
1307 op___getcwd = GETSYSCALL(host, __GETCWD);
1308 rv = op___getcwd(bufp, len);
1309 }
1310
1311 return rv;
1312 }
1313 #endif
1314
1315 static int
1316 moveish(const char *from, const char *to,
1317 int (*rump_op)(const char *, const char *),
1318 int (*host_op)(const char *, const char *))
1319 {
1320 int (*op)(const char *, const char *);
1321 enum pathtype ptf, ptt;
1322
1323 if ((ptf = path_isrump(from)) != PATH_HOST) {
1324 if ((ptt = path_isrump(to)) == PATH_HOST) {
1325 errno = EXDEV;
1326 return -1;
1327 }
1328
1329 if (ptf == PATH_RUMP)
1330 from = path_host2rump(from);
1331 if (ptt == PATH_RUMP)
1332 to = path_host2rump(to);
1333 op = rump_op;
1334 } else {
1335 if (path_isrump(to) != PATH_HOST) {
1336 errno = EXDEV;
1337 return -1;
1338 }
1339
1340 op = host_op;
1341 }
1342
1343 return op(from, to);
1344 }
1345
1346 #ifdef __NetBSD__
1347 int
1348 linkat(int fromfd, const char *from, int tofd, const char *to, int flags)
1349 {
1350 if (fromfd != AT_FDCWD || tofd != AT_FDCWD
1351 || flags != AT_SYMLINK_FOLLOW)
1352 return ENOSYS;
1353
1354 return moveish(from, to,
1355 GETSYSCALL(rump, LINK), GETSYSCALL(host, LINK));
1356 }
1357 #endif
1358
1359 static long
1360 do_pathconf(const char *path, int name, int link)
1361 {
1362 long (*op_pathconf)(const char *, int);
1363 enum pathtype pt;
1364
1365 if ((pt = path_isrump(path)) != PATH_HOST) {
1366 op_pathconf = link ?
1367 GETSYSCALL(rump, LPATHCONF) :
1368 GETSYSCALL(rump, PATHCONF);
1369 if (pt == PATH_RUMP)
1370 path = path_host2rump(path);
1371 } else {
1372 op_pathconf = link ?
1373 GETSYSCALL(host, LPATHCONF) :
1374 GETSYSCALL(host, PATHCONF);
1375 }
1376
1377 return op_pathconf(path, name);
1378 }
1379
1380 long
1381 lpathconf(const char *path, int name)
1382 {
1383 return do_pathconf(path, name, 1);
1384 }
1385
1386 long
1387 pathconf(const char *path, int name)
1388 {
1389 return do_pathconf(path, name, 0);
1390 }
1391
1392 int
1393 link(const char *from, const char *to)
1394 {
1395 return moveish(from, to,
1396 GETSYSCALL(rump, LINK), GETSYSCALL(host, LINK));
1397 }
1398
1399 int
1400 rename(const char *from, const char *to)
1401 {
1402 return moveish(from, to,
1403 GETSYSCALL(rump, RENAME), GETSYSCALL(host, RENAME));
1404 }
1405
1406 int
1407 REALSOCKET(int domain, int type, int protocol)
1408 {
1409 int (*op_socket)(int, int, int);
1410 int fd, rfd;
1411 bool isrump;
1412
1413 isrump = domain < PF_MAX && rumpsockets[domain];
1414
1415 if (isrump)
1416 op_socket = GETSYSCALL(rump, SOCKET);
1417 else
1418 op_socket = GETSYSCALL(host, SOCKET);
1419 fd = op_socket(domain, type, protocol);
1420
1421 if (isrump)
1422 rfd = fd_rump2host(fd);
1423 else
1424 rfd = fd_host2host(fd);
1425 DPRINTF(("socket <- %d/%d (%s)\n", fd, rfd, whichfd(rfd)));
1426
1427 return rfd;
1428 }
1429
1430 int
1431 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
1432 {
1433 int (*op_accept)(int, struct sockaddr *, socklen_t *);
1434 int fd, rfd;
1435 bool isrump;
1436
1437 isrump = fd_isrump(s);
1438
1439 DPRINTF(("accept -> %d", s));
1440 if (isrump) {
1441 op_accept = GETSYSCALL(rump, ACCEPT);
1442 s = fd_host2rump(s);
1443 } else {
1444 op_accept = GETSYSCALL(host, ACCEPT);
1445 }
1446 fd = op_accept(s, addr, addrlen);
1447 if (fd != -1 && isrump)
1448 rfd = fd_rump2host(fd);
1449 else
1450 rfd = fd_host2host(fd);
1451
1452 DPRINTF((" <- %d/%d (%s)\n", fd, rfd, whichfd(rfd)));
1453
1454 return rfd;
1455 }
1456
1457 #ifndef __linux__
1458 int
1459 paccept(int s, struct sockaddr *addr, socklen_t *addrlen,
1460 const sigset_t * restrict sigmask, int flags)
1461 {
1462 int (*op_paccept)(int, struct sockaddr *, socklen_t *,
1463 const sigset_t * restrict, int);
1464 int fd, rfd;
1465 bool isrump;
1466
1467 isrump = fd_isrump(s);
1468
1469 DPRINTF(("paccept -> %d", s));
1470 if (isrump) {
1471 op_paccept = GETSYSCALL(rump, PACCEPT);
1472 s = fd_host2rump(s);
1473 } else {
1474 op_paccept = GETSYSCALL(host, PACCEPT);
1475 }
1476 fd = op_paccept(s, addr, addrlen, sigmask, flags);
1477 if (fd != -1 && isrump)
1478 rfd = fd_rump2host(fd);
1479 else
1480 rfd = fd_host2host(fd);
1481
1482 DPRINTF((" <- %d/%d (%s)\n", fd, rfd, whichfd(rfd)));
1483
1484 return rfd;
1485 }
1486 #endif
1487
1488 /*
1489 * ioctl() and fcntl() are varargs calls and need special treatment.
1490 */
1491
1492 /*
1493 * Various [Linux] libc's have various signatures for ioctl so we
1494 * need to handle the discrepancies. On NetBSD, we use the
1495 * one with unsigned long cmd.
1496 */
1497 int
1498 #ifdef HAVE_IOCTL_CMD_INT
1499 ioctl(int fd, int cmd, ...)
1500 {
1501 int (*op_ioctl)(int, int cmd, ...);
1502 #else
1503 ioctl(int fd, unsigned long cmd, ...)
1504 {
1505 int (*op_ioctl)(int, unsigned long cmd, ...);
1506 #endif
1507 va_list ap;
1508 int rv;
1509
1510 DPRINTF(("ioctl -> %d (%s)\n", fd, whichfd(fd)));
1511 if (fd_isrump(fd)) {
1512 fd = fd_host2rump(fd);
1513 op_ioctl = GETSYSCALL(rump, IOCTL);
1514 } else {
1515 op_ioctl = GETSYSCALL(host, IOCTL);
1516 }
1517
1518 va_start(ap, cmd);
1519 rv = op_ioctl(fd, cmd, va_arg(ap, void *));
1520 va_end(ap);
1521 DPRINTF(("ioctl <- %d\n", rv));
1522 return rv;
1523 }
1524
1525 int
1526 fcntl(int fd, int cmd, ...)
1527 {
1528 int (*op_fcntl)(int, int, ...);
1529 va_list ap;
1530 int rv, minfd;
1531
1532 DPRINTF(("fcntl -> %d (cmd %d)\n", fd, cmd));
1533
1534 switch (cmd) {
1535 case F_DUPFD_CLOEXEC: /* Ignore CLOEXEC bit for now */
1536 case F_DUPFD:
1537 va_start(ap, cmd);
1538 minfd = va_arg(ap, int);
1539 va_end(ap);
1540 return dodup(fd, minfd);
1541
1542 #ifdef F_CLOSEM
1543 case F_CLOSEM: {
1544 int maxdup2, i;
1545
1546 /*
1547 * So, if fd < HIJACKOFF, we want to do a host closem.
1548 */
1549
1550 if (fd < hijack_fdoff) {
1551 int closemfd = fd;
1552
1553 if (rumpclient__closenotify(&closemfd,
1554 RUMPCLIENT_CLOSE_FCLOSEM) == -1)
1555 return -1;
1556 op_fcntl = GETSYSCALL(host, FCNTL);
1557 rv = op_fcntl(closemfd, cmd);
1558 if (rv)
1559 return rv;
1560 }
1561
1562 /*
1563 * Additionally, we want to do a rump closem, but only
1564 * for the file descriptors not dup2'd.
1565 */
1566
1567 for (i = 0, maxdup2 = -1; i <= DUP2HIGH; i++) {
1568 if (dup2vec[i] & DUP2BIT) {
1569 int val;
1570
1571 val = dup2vec[i] & DUP2FDMASK;
1572 maxdup2 = MAX(val, maxdup2);
1573 }
1574 }
1575
1576 if (fd >= hijack_fdoff)
1577 fd -= hijack_fdoff;
1578 else
1579 fd = 0;
1580 fd = MAX(maxdup2+1, fd);
1581
1582 /* hmm, maybe we should close rump fd's not within dup2mask? */
1583 return rump_sys_fcntl(fd, F_CLOSEM);
1584 }
1585 #endif /* F_CLOSEM */
1586
1587 #ifdef F_MAXFD
1588 case F_MAXFD:
1589 /*
1590 * For maxfd, if there's a rump kernel fd, return
1591 * it hostified. Otherwise, return host's MAXFD
1592 * return value.
1593 */
1594 if ((rv = rump_sys_fcntl(fd, F_MAXFD)) != -1) {
1595 /*
1596 * This might go a little wrong in case
1597 * of dup2 to [012], but I'm not sure if
1598 * there's a justification for tracking
1599 * that info. Consider e.g.
1600 * dup2(rumpfd, 2) followed by rump_sys_open()
1601 * returning 1. We should return 1+HIJACKOFF,
1602 * not 2+HIJACKOFF. However, if [01] is not
1603 * open, the correct return value is 2.
1604 */
1605 return fd_rump2host(fd);
1606 } else {
1607 op_fcntl = GETSYSCALL(host, FCNTL);
1608 return op_fcntl(fd, F_MAXFD);
1609 }
1610 /*NOTREACHED*/
1611 #endif /* F_MAXFD */
1612
1613 default:
1614 if (fd_isrump(fd)) {
1615 fd = fd_host2rump(fd);
1616 op_fcntl = GETSYSCALL(rump, FCNTL);
1617 } else {
1618 op_fcntl = GETSYSCALL(host, FCNTL);
1619 }
1620
1621 va_start(ap, cmd);
1622 rv = op_fcntl(fd, cmd, va_arg(ap, void *));
1623 va_end(ap);
1624 return rv;
1625 }
1626 /*NOTREACHED*/
1627 }
1628
1629 int
1630 flock(int fd, int operation)
1631 {
1632 int (*op_flock)(int, int);
1633
1634 DPRINTF(("flock -> %d (operation %d)\n", fd, operation));
1635
1636 if (fd_isrump(fd)) {
1637 fd = fd_host2rump(fd);
1638 op_flock = GETSYSCALL(rump, FLOCK);
1639 } else {
1640 op_flock = GETSYSCALL(host, FLOCK);
1641 }
1642
1643 return op_flock(fd, operation);
1644 }
1645
1646 int
1647 close(int fd)
1648 {
1649 int (*op_close)(int);
1650 int rv;
1651
1652 DPRINTF(("close -> %d\n", fd));
1653 if (fd_isrump(fd)) {
1654 bool undup2 = false;
1655 int ofd;
1656
1657 if (isdup2d(ofd = fd)) {
1658 undup2 = true;
1659 }
1660
1661 fd = fd_host2rump(fd);
1662 if (!undup2 && killdup2alias(fd)) {
1663 return 0;
1664 }
1665
1666 op_close = GETSYSCALL(rump, CLOSE);
1667 rv = op_close(fd);
1668 if (rv == 0 && undup2) {
1669 clrdup2(ofd);
1670 }
1671 } else {
1672 if (rumpclient__closenotify(&fd, RUMPCLIENT_CLOSE_CLOSE) == -1)
1673 return -1;
1674 op_close = GETSYSCALL(host, CLOSE);
1675 rv = op_close(fd);
1676 }
1677
1678 return rv;
1679 }
1680
1681 /*
1682 * write cannot issue a standard debug printf due to recursion
1683 */
1684 ssize_t
1685 write(int fd, const void *buf, size_t blen)
1686 {
1687 ssize_t (*op_write)(int, const void *, size_t);
1688
1689 if (fd_isrump(fd)) {
1690 fd = fd_host2rump(fd);
1691 op_write = GETSYSCALL(rump, WRITE);
1692 } else {
1693 op_write = GETSYSCALL(host, WRITE);
1694 }
1695
1696 return op_write(fd, buf, blen);
1697 }
1698
1699 /*
1700 * file descriptor passing
1701 *
1702 * we intercept sendmsg and recvmsg to convert file descriptors in
1703 * control messages. an attempt to send a descriptor from a different kernel
1704 * is rejected. (ENOTSUP)
1705 */
1706
1707 static int
1708 _msg_convert_fds(struct msghdr *msg, int (*func)(int), bool dryrun)
1709 {
1710 struct cmsghdr *cmsg;
1711
1712 for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
1713 cmsg = CMSG_NXTHDR(msg, cmsg)) {
1714 if (cmsg->cmsg_level == SOL_SOCKET &&
1715 cmsg->cmsg_type == SCM_RIGHTS) {
1716 int *fdp = (void *)CMSG_DATA(cmsg);
1717 const size_t size =
1718 cmsg->cmsg_len - __CMSG_ALIGN(sizeof(*cmsg));
1719 const int nfds = (int)(size / sizeof(int));
1720 const int * const efdp = fdp + nfds;
1721
1722 while (fdp < efdp) {
1723 const int newval = func(*fdp);
1724
1725 if (newval < 0) {
1726 return ENOTSUP;
1727 }
1728 if (!dryrun)
1729 *fdp = newval;
1730 fdp++;
1731 }
1732 }
1733 }
1734 return 0;
1735 }
1736
1737 static int
1738 msg_convert_fds(struct msghdr *msg, int (*func)(int))
1739 {
1740
1741 return _msg_convert_fds(msg, func, false);
1742 }
1743
1744 static int
1745 msg_check_fds(struct msghdr *msg, int (*func)(int))
1746 {
1747
1748 return _msg_convert_fds(msg, func, true);
1749 }
1750
1751 ssize_t
1752 recvmsg(int fd, struct msghdr *msg, int flags)
1753 {
1754 ssize_t (*op_recvmsg)(int, struct msghdr *, int);
1755 ssize_t ret;
1756 const bool isrump = fd_isrump(fd);
1757
1758 DPRINTF(("%s -> %d (%s)\n", __func__, fd, whichfd(fd)));
1759 if (isrump) {
1760 fd = fd_host2rump(fd);
1761 op_recvmsg = GETSYSCALL(rump, RECVMSG);
1762 } else {
1763 op_recvmsg = GETSYSCALL(host, RECVMSG);
1764 }
1765 ret = op_recvmsg(fd, msg, flags);
1766 if (ret == -1) {
1767 return ret;
1768 }
1769 /*
1770 * convert descriptors in the message.
1771 */
1772 if (isrump) {
1773 msg_convert_fds(msg, fd_rump2host);
1774 } else {
1775 msg_convert_fds(msg, fd_host2host);
1776 }
1777 return ret;
1778 }
1779
1780 ssize_t
1781 recv(int fd, void *buf, size_t len, int flags)
1782 {
1783
1784 return recvfrom(fd, buf, len, flags, NULL, NULL);
1785 }
1786
1787 ssize_t
1788 send(int fd, const void *buf, size_t len, int flags)
1789 {
1790
1791 return sendto(fd, buf, len, flags, NULL, 0);
1792 }
1793
1794 static int
1795 fd_check_rump(int fd)
1796 {
1797
1798 return fd_isrump(fd) ? 0 : -1;
1799 }
1800
1801 static int
1802 fd_check_host(int fd)
1803 {
1804
1805 return !fd_isrump(fd) ? 0 : -1;
1806 }
1807
1808 ssize_t
1809 sendmsg(int fd, const struct msghdr *msg, int flags)
1810 {
1811 ssize_t (*op_sendmsg)(int, const struct msghdr *, int);
1812 const bool isrump = fd_isrump(fd);
1813 int error;
1814
1815 DPRINTF(("%s -> %d (%s)\n", __func__, fd, whichfd(fd)));
1816 /*
1817 * reject descriptors from a different kernel.
1818 */
1819 error = msg_check_fds(__UNCONST(msg),
1820 isrump ? fd_check_rump: fd_check_host);
1821 if (error != 0) {
1822 errno = error;
1823 return -1;
1824 }
1825 /*
1826 * convert descriptors in the message to raw values.
1827 */
1828 if (isrump) {
1829 fd = fd_host2rump(fd);
1830 /*
1831 * XXX we directly modify the given message assuming:
1832 * - cmsg is writable (typically on caller's stack)
1833 * - caller don't care cmsg's contents after calling sendmsg.
1834 * (thus no need to restore values)
1835 *
1836 * it's safer to copy and modify instead.
1837 */
1838 msg_convert_fds(__UNCONST(msg), fd_host2rump);
1839 op_sendmsg = GETSYSCALL(rump, SENDMSG);
1840 } else {
1841 op_sendmsg = GETSYSCALL(host, SENDMSG);
1842 }
1843 return op_sendmsg(fd, msg, flags);
1844 }
1845
1846 /*
1847 * dup2 is special. we allow dup2 of a rump kernel fd to 0-2 since
1848 * many programs do that. dup2 of a rump kernel fd to another value
1849 * not >= fdoff is an error.
1850 *
1851 * Note: cannot rump2host newd, because it is often hardcoded.
1852 */
1853 int
1854 dup2(int oldd, int newd)
1855 {
1856 int (*host_dup2)(int, int);
1857 int rv;
1858
1859 DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
1860
1861 if (fd_isrump(oldd)) {
1862 int (*op_close)(int) = GETSYSCALL(host, CLOSE);
1863
1864 /* only allow fd 0-2 for cross-kernel dup */
1865 if (!(newd >= 0 && newd <= 2 && !fd_isrump(newd))) {
1866 errno = EBADF;
1867 return -1;
1868 }
1869
1870 /* regular dup2? */
1871 if (fd_isrump(newd)) {
1872 newd = fd_host2rump(newd);
1873 rv = rump_sys_dup2(oldd, newd);
1874 return fd_rump2host(rv);
1875 }
1876
1877 /*
1878 * dup2 rump => host? just establish an
1879 * entry in the mapping table.
1880 */
1881 op_close(newd);
1882 setdup2(newd, fd_host2rump(oldd));
1883 rv = 0;
1884 } else {
1885 host_dup2 = syscalls[DUALCALL_DUP2].bs_host;
1886 if (rumpclient__closenotify(&newd, RUMPCLIENT_CLOSE_DUP2) == -1)
1887 return -1;
1888 rv = host_dup2(oldd, newd);
1889 }
1890
1891 return rv;
1892 }
1893
1894 int
1895 dup(int oldd)
1896 {
1897
1898 return dodup(oldd, 0);
1899 }
1900
1901 pid_t
1902 fork(void)
1903 {
1904 pid_t rv;
1905
1906 DPRINTF(("fork\n"));
1907
1908 rv = rumpclient__dofork(host_fork);
1909
1910 DPRINTF(("fork returns %d\n", rv));
1911 return rv;
1912 }
1913 #ifdef VFORK
1914 /* we do not have the luxury of not requiring a stackframe */
1915 #define __strong_alias_macro(m, f) __strong_alias(m, f)
1916 __strong_alias_macro(VFORK,fork)
1917 #endif
1918
1919 int
1920 daemon(int nochdir, int noclose)
1921 {
1922 struct rumpclient_fork *rf;
1923
1924 if ((rf = rumpclient_prefork()) == NULL)
1925 return -1;
1926
1927 if (host_daemon(nochdir, noclose) == -1)
1928 return -1;
1929
1930 if (rumpclient_fork_init(rf) == -1)
1931 return -1;
1932
1933 return 0;
1934 }
1935
1936 int
1937 execve(const char *path, char *const argv[], char *const envp[])
1938 {
1939 char buf[128];
1940 char *dup2str;
1941 const char *pwdinrumpstr;
1942 char **newenv;
1943 size_t nelem;
1944 int rv, sverrno;
1945 int bonus = 2, i = 0;
1946
1947 snprintf(buf, sizeof(buf), "RUMPHIJACK__DUP2INFO=%u,%u,%u",
1948 dup2vec[0], dup2vec[1], dup2vec[2]);
1949 dup2str = strdup(buf);
1950 if (dup2str == NULL) {
1951 errno = ENOMEM;
1952 return -1;
1953 }
1954
1955 if (pwdinrump) {
1956 pwdinrumpstr = "RUMPHIJACK__PWDINRUMP=true";
1957 bonus++;
1958 } else {
1959 pwdinrumpstr = NULL;
1960 }
1961
1962 for (nelem = 0; envp && envp[nelem]; nelem++)
1963 continue;
1964 newenv = malloc(sizeof(*newenv) * (nelem+bonus));
1965 if (newenv == NULL) {
1966 free(dup2str);
1967 errno = ENOMEM;
1968 return -1;
1969 }
1970 memcpy(newenv, envp, nelem*sizeof(*newenv));
1971 newenv[nelem+i] = dup2str;
1972 i++;
1973
1974 if (pwdinrumpstr) {
1975 newenv[nelem+i] = __UNCONST(pwdinrumpstr);
1976 i++;
1977 }
1978 newenv[nelem+i] = NULL;
1979 _DIAGASSERT(i < bonus);
1980
1981 rv = rumpclient_exec(path, argv, newenv);
1982
1983 _DIAGASSERT(rv != 0);
1984 sverrno = errno;
1985 free(newenv);
1986 free(dup2str);
1987 errno = sverrno;
1988 return rv;
1989 }
1990
1991 /*
1992 * select is done by calling poll.
1993 */
1994 int
1995 REALPSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
1996 const struct timespec *timeout, const sigset_t *sigmask)
1997 {
1998 struct pollfd *pfds;
1999 nfds_t realnfds;
2000 int i, j;
2001 int rv, incr;
2002
2003 DPRINTF(("pselect %d %p %p %p %p %p\n", nfds,
2004 readfds, writefds, exceptfds, timeout, sigmask));
2005
2006 /*
2007 * Well, first we must scan the fds to figure out how many
2008 * fds there really are. This is because up to and including
2009 * nb5 poll() silently refuses nfds > process_maxopen_fds.
2010 * Seems to be fixed in current, thank the maker.
2011 * god damn cluster...bomb.
2012 */
2013
2014 for (i = 0, realnfds = 0; i < nfds; i++) {
2015 if (readfds && FD_ISSET(i, readfds)) {
2016 realnfds++;
2017 continue;
2018 }
2019 if (writefds && FD_ISSET(i, writefds)) {
2020 realnfds++;
2021 continue;
2022 }
2023 if (exceptfds && FD_ISSET(i, exceptfds)) {
2024 realnfds++;
2025 continue;
2026 }
2027 }
2028
2029 if (realnfds) {
2030 pfds = calloc(realnfds, sizeof(*pfds));
2031 if (!pfds)
2032 return -1;
2033 } else {
2034 pfds = NULL;
2035 }
2036
2037 for (i = 0, j = 0; i < nfds; i++) {
2038 incr = 0;
2039 if (readfds && FD_ISSET(i, readfds)) {
2040 pfds[j].fd = i;
2041 pfds[j].events |= POLLIN;
2042 incr=1;
2043 }
2044 if (writefds && FD_ISSET(i, writefds)) {
2045 pfds[j].fd = i;
2046 pfds[j].events |= POLLOUT;
2047 incr=1;
2048 }
2049 if (exceptfds && FD_ISSET(i, exceptfds)) {
2050 pfds[j].fd = i;
2051 pfds[j].events |= POLLHUP|POLLERR;
2052 incr=1;
2053 }
2054 if (incr)
2055 j++;
2056 }
2057 assert(j == (int)realnfds);
2058
2059 rv = REALPOLLTS(pfds, realnfds, timeout, sigmask);
2060 /*
2061 * "If select() returns with an error the descriptor sets
2062 * will be unmodified"
2063 */
2064 if (rv < 0)
2065 goto out;
2066
2067 /*
2068 * zero out results (can't use FD_ZERO for the
2069 * obvious select-me-not reason). whee.
2070 *
2071 * We do this here since some software ignores the return
2072 * value of select, and hence if the timeout expires, it may
2073 * assume all input descriptors have activity.
2074 */
2075 for (i = 0; i < nfds; i++) {
2076 if (readfds)
2077 FD_CLR(i, readfds);
2078 if (writefds)
2079 FD_CLR(i, writefds);
2080 if (exceptfds)
2081 FD_CLR(i, exceptfds);
2082 }
2083 if (rv == 0)
2084 goto out;
2085
2086 /*
2087 * We have >0 fds with activity. Harvest the results.
2088 */
2089 for (i = 0; i < (int)realnfds; i++) {
2090 if (readfds) {
2091 if (pfds[i].revents & POLLIN) {
2092 FD_SET(pfds[i].fd, readfds);
2093 }
2094 }
2095 if (writefds) {
2096 if (pfds[i].revents & POLLOUT) {
2097 FD_SET(pfds[i].fd, writefds);
2098 }
2099 }
2100 if (exceptfds) {
2101 if (pfds[i].revents & (POLLHUP|POLLERR)) {
2102 FD_SET(pfds[i].fd, exceptfds);
2103 }
2104 }
2105 }
2106
2107 out:
2108 free(pfds);
2109 return rv;
2110 }
2111
2112 int
2113 REALSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
2114 struct timeval *timeout)
2115 {
2116 struct timespec ts, *tsp = NULL;
2117 if (timeout) {
2118 TIMEVAL_TO_TIMESPEC(timeout, &ts);
2119 tsp = &ts;
2120 }
2121 return REALPSELECT(nfds, readfds, writefds, exceptfds, tsp, NULL);
2122 }
2123
2124
2125 static void
2126 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
2127 {
2128 nfds_t i;
2129
2130 for (i = 0; i < nfds; i++) {
2131 if (fds[i].fd == -1)
2132 continue;
2133
2134 if (fd_isrump(fds[i].fd))
2135 (*rumpcall)++;
2136 else
2137 (*hostcall)++;
2138 }
2139 }
2140
2141 static void
2142 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
2143 {
2144 nfds_t i;
2145
2146 for (i = 0; i < nfds; i++) {
2147 fds[i].fd = fdadj(fds[i].fd);
2148 }
2149 }
2150
2151 /*
2152 * poll is easy as long as the call comes in the fds only in one
2153 * kernel. otherwise its quite tricky...
2154 */
2155 struct pollarg {
2156 struct pollfd *pfds;
2157 nfds_t nfds;
2158 const struct timespec *ts;
2159 const sigset_t *sigmask;
2160 int pipefd;
2161 int errnum;
2162 };
2163
2164 static void *
2165 hostpoll(void *arg)
2166 {
2167 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
2168 const sigset_t *);
2169 struct pollarg *parg = arg;
2170 intptr_t rv;
2171
2172 op_pollts = GETSYSCALL(host, POLLTS);
2173 rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
2174 if (rv == -1)
2175 parg->errnum = errno;
2176 rump_sys_write(parg->pipefd, &rv, sizeof(rv));
2177
2178 return (void *)rv;
2179 }
2180
2181 int
2182 REALPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
2183 const sigset_t *sigmask)
2184 {
2185 int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
2186 const sigset_t *);
2187 int (*host_close)(int);
2188 int hostcall = 0, rumpcall = 0;
2189 pthread_t pt;
2190 nfds_t i;
2191 int rv;
2192
2193 DPRINTF(("poll %p %d %p %p\n", fds, (int)nfds, ts, sigmask));
2194 checkpoll(fds, nfds, &hostcall, &rumpcall);
2195
2196 if (hostcall && rumpcall) {
2197 struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
2198 int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
2199 struct pollarg parg;
2200 void *trv_val;
2201 int sverrno = 0, rv_rump, rv_host, errno_rump, errno_host;
2202
2203 /*
2204 * ok, this is where it gets tricky. We must support
2205 * this since it's a very common operation in certain
2206 * types of software (telnet, netcat, etc). We allocate
2207 * two vectors and run two poll commands in separate
2208 * threads. Whichever returns first "wins" and the
2209 * other kernel's fds won't show activity.
2210 */
2211 rv = -1;
2212
2213 /* allocate full vector for O(n) joining after call */
2214 pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
2215 if (!pfd_host)
2216 goto out;
2217 pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
2218 if (!pfd_rump) {
2219 goto out;
2220 }
2221
2222 /*
2223 * then, open two pipes, one for notifications
2224 * to each kernel.
2225 *
2226 * At least the rump pipe should probably be
2227 * cached, along with the helper threads. This
2228 * should give a microbenchmark improvement (haven't
2229 * experienced a macro-level problem yet, though).
2230 */
2231 if ((rv = rump_sys_pipe(rpipe)) == -1) {
2232 sverrno = errno;
2233 }
2234 if (rv == 0 && (rv = pipe(hpipe)) == -1) {
2235 sverrno = errno;
2236 }
2237
2238 /* split vectors (or signal errors) */
2239 for (i = 0; i < nfds; i++) {
2240 int fd;
2241
2242 fds[i].revents = 0;
2243 if (fds[i].fd == -1) {
2244 pfd_host[i].fd = -1;
2245 pfd_rump[i].fd = -1;
2246 } else if (fd_isrump(fds[i].fd)) {
2247 pfd_host[i].fd = -1;
2248 fd = fd_host2rump(fds[i].fd);
2249 if (fd == rpipe[0] || fd == rpipe[1]) {
2250 fds[i].revents = POLLNVAL;
2251 if (rv != -1)
2252 rv++;
2253 }
2254 pfd_rump[i].fd = fd;
2255 pfd_rump[i].events = fds[i].events;
2256 } else {
2257 pfd_rump[i].fd = -1;
2258 fd = fds[i].fd;
2259 if (fd == hpipe[0] || fd == hpipe[1]) {
2260 fds[i].revents = POLLNVAL;
2261 if (rv != -1)
2262 rv++;
2263 }
2264 pfd_host[i].fd = fd;
2265 pfd_host[i].events = fds[i].events;
2266 }
2267 pfd_rump[i].revents = pfd_host[i].revents = 0;
2268 }
2269 if (rv) {
2270 goto out;
2271 }
2272
2273 pfd_host[nfds].fd = hpipe[0];
2274 pfd_host[nfds].events = POLLIN;
2275 pfd_rump[nfds].fd = rpipe[0];
2276 pfd_rump[nfds].events = POLLIN;
2277
2278 /*
2279 * then, create a thread to do host part and meanwhile
2280 * do rump kernel part right here
2281 */
2282
2283 parg.pfds = pfd_host;
2284 parg.nfds = nfds+1;
2285 parg.ts = ts;
2286 parg.sigmask = sigmask;
2287 parg.pipefd = rpipe[1];
2288 pthread_create(&pt, NULL, hostpoll, &parg);
2289
2290 op_pollts = GETSYSCALL(rump, POLLTS);
2291 rv_rump = op_pollts(pfd_rump, nfds+1, ts, NULL);
2292 errno_rump = errno;
2293 write(hpipe[1], &rv, sizeof(rv));
2294 pthread_join(pt, &trv_val);
2295 rv_host = (int)(intptr_t)trv_val;
2296 errno_host = parg.errnum;
2297
2298 /* strip cross-thread notification from real results */
2299 if (rv_host > 0 && pfd_host[nfds].revents & POLLIN) {
2300 rv_host--;
2301 }
2302 if (rv_rump > 0 && pfd_rump[nfds].revents & POLLIN) {
2303 rv_rump--;
2304 }
2305
2306 /* then merge the results into what's reported to the caller */
2307 if (rv_rump > 0 || rv_host > 0) {
2308 /* SUCCESS */
2309
2310 rv = 0;
2311 if (rv_rump > 0) {
2312 for (i = 0; i < nfds; i++) {
2313 if (pfd_rump[i].fd != -1)
2314 fds[i].revents
2315 = pfd_rump[i].revents;
2316 }
2317 rv += rv_rump;
2318 }
2319 if (rv_host > 0) {
2320 for (i = 0; i < nfds; i++) {
2321 if (pfd_host[i].fd != -1)
2322 fds[i].revents
2323 = pfd_host[i].revents;
2324 }
2325 rv += rv_host;
2326 }
2327 assert(rv > 0);
2328 sverrno = 0;
2329 } else if (rv_rump == -1 || rv_host == -1) {
2330 /* ERROR */
2331
2332 /* just pick one kernel at "random" */
2333 rv = -1;
2334 if (rv_host == -1) {
2335 sverrno = errno_host;
2336 } else if (rv_rump == -1) {
2337 sverrno = errno_rump;
2338 }
2339 } else {
2340 /* TIMEOUT */
2341
2342 rv = 0;
2343 assert(rv_rump == 0 && rv_host == 0);
2344 }
2345
2346 out:
2347 host_close = GETSYSCALL(host, CLOSE);
2348 if (rpipe[0] != -1)
2349 rump_sys_close(rpipe[0]);
2350 if (rpipe[1] != -1)
2351 rump_sys_close(rpipe[1]);
2352 if (hpipe[0] != -1)
2353 host_close(hpipe[0]);
2354 if (hpipe[1] != -1)
2355 host_close(hpipe[1]);
2356 free(pfd_host);
2357 free(pfd_rump);
2358 errno = sverrno;
2359 } else {
2360 if (hostcall) {
2361 op_pollts = GETSYSCALL(host, POLLTS);
2362 } else {
2363 op_pollts = GETSYSCALL(rump, POLLTS);
2364 adjustpoll(fds, nfds, fd_host2rump);
2365 }
2366
2367 rv = op_pollts(fds, nfds, ts, sigmask);
2368 if (rumpcall)
2369 adjustpoll(fds, nfds, fd_rump2host_withdup);
2370 }
2371
2372 return rv;
2373 }
2374
2375 int
2376 poll(struct pollfd *fds, nfds_t nfds, int timeout)
2377 {
2378 struct timespec ts;
2379 struct timespec *tsp = NULL;
2380
2381 if (timeout != INFTIM) {
2382 ts.tv_sec = timeout / 1000;
2383 ts.tv_nsec = (timeout % 1000) * 1000*1000;
2384
2385 tsp = &ts;
2386 }
2387
2388 return REALPOLLTS(fds, nfds, tsp, NULL);
2389 }
2390
2391 #ifdef HAVE_KQUEUE
2392 int
2393 REALKEVENT(int kq, const struct kevent *changelist, size_t nchanges,
2394 struct kevent *eventlist, size_t nevents,
2395 const struct timespec *timeout)
2396 {
2397 int (*op_kevent)(int, const struct kevent *, size_t,
2398 struct kevent *, size_t, const struct timespec *);
2399 const struct kevent *ev;
2400 size_t i;
2401
2402 /*
2403 * Check that we don't attempt to kevent rump kernel fd's.
2404 * That needs similar treatment to select/poll, but is slightly
2405 * trickier since we need to manage to different kq descriptors.
2406 * (TODO, in case you're wondering).
2407 */
2408 for (i = 0; i < nchanges; i++) {
2409 ev = &changelist[i];
2410 if (ev->filter == EVFILT_READ || ev->filter == EVFILT_WRITE ||
2411 ev->filter == EVFILT_VNODE) {
2412 if (fd_isrump((int)ev->ident)) {
2413 errno = ENOTSUP;
2414 return -1;
2415 }
2416 }
2417 }
2418
2419 op_kevent = GETSYSCALL(host, KEVENT);
2420 return op_kevent(kq, changelist, nchanges, eventlist, nevents, timeout);
2421 }
2422 #endif /* HAVE_KQUEUE */
2423
2424 /*
2425 * mmapping from a rump kernel is not supported, so disallow it.
2426 */
2427 void *
2428 mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset)
2429 {
2430
2431 if (flags & MAP_FILE && fd_isrump(fd)) {
2432 errno = ENOSYS;
2433 return MAP_FAILED;
2434 }
2435 if (__predict_false(host_mmap == NULL)) {
2436 host_mmap = rumphijack_dlsym(RTLD_NEXT, "mmap");
2437 }
2438 return host_mmap(addr, len, prot, flags, fd, offset);
2439 }
2440
2441 #ifdef __NetBSD__
2442 /*
2443 * these go to one or the other on a per-process configuration
2444 */
2445 int __sysctl(const int *, unsigned int, void *, size_t *, const void *, size_t);
2446 int
2447 __sysctl(const int *name, unsigned int namelen, void *old, size_t *oldlenp,
2448 const void *new, size_t newlen)
2449 {
2450 int (*op___sysctl)(const int *, unsigned int, void *, size_t *,
2451 const void *, size_t);
2452
2453 if (rumpsysctl) {
2454 op___sysctl = GETSYSCALL(rump, __SYSCTL);
2455 } else {
2456 op___sysctl = GETSYSCALL(host, __SYSCTL);
2457 /* we haven't inited yet */
2458 if (__predict_false(op___sysctl == NULL)) {
2459 op___sysctl = rumphijack_dlsym(RTLD_NEXT, "__sysctl");
2460 }
2461 }
2462
2463 return op___sysctl(name, namelen, old, oldlenp, new, newlen);
2464 }
2465 int modctl(int, void *);
2466 int
2467 modctl(int operation, void *argp)
2468 {
2469 int (*op_modctl)(int operation, void *argp);
2470
2471 if (rumpmodctl) {
2472 op_modctl = GETSYSCALL(rump, MODCTL);
2473 } else {
2474 op_modctl = GETSYSCALL(host, MODCTL);
2475 }
2476
2477 return op_modctl(operation, argp);
2478 }
2479 #endif
2480
2481 /*
2482 * Rest are std type calls.
2483 */
2484
2485 #ifdef HAVE_UTIMENSAT
2486 ATCALL(int, utimensat, DUALCALL_UTIMENSAT, \
2487 (int fd, const char *path, const struct timespec t[2], int f), \
2488 (int, const char *, const struct timespec [2], int),
2489 (fd, path, t, f))
2490 #endif
2491
2492 FDCALL(int, bind, DUALCALL_BIND, \
2493 (int fd, const struct sockaddr *name, socklen_t namelen), \
2494 (int, const struct sockaddr *, socklen_t), \
2495 (fd, name, namelen))
2496
2497 FDCALL(int, connect, DUALCALL_CONNECT, \
2498 (int fd, const struct sockaddr *name, socklen_t namelen), \
2499 (int, const struct sockaddr *, socklen_t), \
2500 (fd, name, namelen))
2501
2502 FDCALL(int, getpeername, DUALCALL_GETPEERNAME, \
2503 (int fd, struct sockaddr *name, socklen_t *namelen), \
2504 (int, struct sockaddr *, socklen_t *), \
2505 (fd, name, namelen))
2506
2507 FDCALL(int, getsockname, DUALCALL_GETSOCKNAME, \
2508 (int fd, struct sockaddr *name, socklen_t *namelen), \
2509 (int, struct sockaddr *, socklen_t *), \
2510 (fd, name, namelen))
2511
2512 FDCALL(int, listen, DUALCALL_LISTEN, \
2513 (int fd, int backlog), \
2514 (int, int), \
2515 (fd, backlog))
2516
2517 FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM, \
2518 (int fd, void *buf, size_t len, int flags, \
2519 struct sockaddr *from, socklen_t *fromlen), \
2520 (int, void *, size_t, int, struct sockaddr *, socklen_t *), \
2521 (fd, buf, len, flags, from, fromlen))
2522
2523 FDCALL(ssize_t, sendto, DUALCALL_SENDTO, \
2524 (int fd, const void *buf, size_t len, int flags, \
2525 const struct sockaddr *to, socklen_t tolen), \
2526 (int, const void *, size_t, int, \
2527 const struct sockaddr *, socklen_t), \
2528 (fd, buf, len, flags, to, tolen))
2529
2530 FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT, \
2531 (int fd, int level, int optn, void *optval, socklen_t *optlen), \
2532 (int, int, int, void *, socklen_t *), \
2533 (fd, level, optn, optval, optlen))
2534
2535 FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT, \
2536 (int fd, int level, int optn, \
2537 const void *optval, socklen_t optlen), \
2538 (int, int, int, const void *, socklen_t), \
2539 (fd, level, optn, optval, optlen))
2540
2541 FDCALL(int, shutdown, DUALCALL_SHUTDOWN, \
2542 (int fd, int how), \
2543 (int, int), \
2544 (fd, how))
2545
2546 FDCALL(ssize_t, REALREAD, DUALCALL_READ, \
2547 (int fd, void *buf, size_t buflen), \
2548 (int, void *, size_t), \
2549 (fd, buf, buflen))
2550
2551 #ifdef __linux__
2552 ssize_t __read_chk(int, void *, size_t)
2553 __attribute__((alias("read")));
2554 #endif
2555
2556 FDCALL(ssize_t, readv, DUALCALL_READV, \
2557 (int fd, const struct iovec *iov, int iovcnt), \
2558 (int, const struct iovec *, int), \
2559 (fd, iov, iovcnt))
2560
2561 FDCALL(ssize_t, REALPREAD, DUALCALL_PREAD, \
2562 (int fd, void *buf, size_t nbytes, off_t offset), \
2563 (int, void *, size_t, off_t), \
2564 (fd, buf, nbytes, offset))
2565
2566 FDCALL(ssize_t, preadv, DUALCALL_PREADV, \
2567 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \
2568 (int, const struct iovec *, int, off_t), \
2569 (fd, iov, iovcnt, offset))
2570
2571 FDCALL(ssize_t, writev, DUALCALL_WRITEV, \
2572 (int fd, const struct iovec *iov, int iovcnt), \
2573 (int, const struct iovec *, int), \
2574 (fd, iov, iovcnt))
2575
2576 FDCALL(ssize_t, REALPWRITE, DUALCALL_PWRITE, \
2577 (int fd, const void *buf, size_t nbytes, off_t offset), \
2578 (int, const void *, size_t, off_t), \
2579 (fd, buf, nbytes, offset))
2580
2581 FDCALL(ssize_t, pwritev, DUALCALL_PWRITEV, \
2582 (int fd, const struct iovec *iov, int iovcnt, off_t offset), \
2583 (int, const struct iovec *, int, off_t), \
2584 (fd, iov, iovcnt, offset))
2585
2586 #ifndef __linux__
2587 FDCALL(int, REALFSTAT, DUALCALL_FSTAT, \
2588 (int fd, struct stat *sb), \
2589 (int, struct stat *), \
2590 (fd, sb))
2591 #endif
2592
2593 #ifdef __NetBSD__
2594 FDCALL(int, REALFSTATVFS1, DUALCALL_FSTATVFS1, \
2595 (int fd, struct statvfs *buf, int flags), \
2596 (int, struct statvfs *, int), \
2597 (fd, buf, flags))
2598 #endif
2599
2600 FDCALL(off_t, lseek, DUALCALL_LSEEK, \
2601 (int fd, off_t offset, int whence), \
2602 (int, off_t, int), \
2603 (fd, offset, whence))
2604 #ifdef LSEEK_ALIAS
2605 __strong_alias(LSEEK_ALIAS,lseek)
2606 #endif
2607
2608 #ifndef __linux__
2609 FDCALL(int, REALGETDENTS, DUALCALL_GETDENTS, \
2610 (int fd, char *buf, size_t nbytes), \
2611 (int, char *, size_t), \
2612 (fd, buf, nbytes))
2613 #endif
2614
2615 FDCALL(int, fchown, DUALCALL_FCHOWN, \
2616 (int fd, uid_t owner, gid_t group), \
2617 (int, uid_t, gid_t), \
2618 (fd, owner, group))
2619
2620 FDCALL(int, fchmod, DUALCALL_FCHMOD, \
2621 (int fd, mode_t mode), \
2622 (int, mode_t), \
2623 (fd, mode))
2624
2625 FDCALL(int, ftruncate, DUALCALL_FTRUNCATE, \
2626 (int fd, off_t length), \
2627 (int, off_t), \
2628 (fd, length))
2629
2630 FDCALL(int, fsync, DUALCALL_FSYNC, \
2631 (int fd), \
2632 (int), \
2633 (fd))
2634
2635 #ifdef HAVE_FSYNC_RANGE
2636 FDCALL(int, fsync_range, DUALCALL_FSYNC_RANGE, \
2637 (int fd, int how, off_t start, off_t length), \
2638 (int, int, off_t, off_t), \
2639 (fd, how, start, length))
2640 #endif
2641
2642 FDCALL(int, futimes, DUALCALL_FUTIMES, \
2643 (int fd, const struct timeval *tv), \
2644 (int, const struct timeval *), \
2645 (fd, tv))
2646
2647 FDCALL(int, futimens, DUALCALL_FUTIMENS, \
2648 (int fd, const struct timespec *ts), \
2649 (int, const struct timespec *), \
2650 (fd, ts))
2651
2652 #ifdef HAVE_CHFLAGS
2653 FDCALL(int, fchflags, DUALCALL_FCHFLAGS, \
2654 (int fd, u_long flags), \
2655 (int, u_long), \
2656 (fd, flags))
2657 #endif
2658
2659 /*
2660 * path-based selectors
2661 */
2662
2663 #ifndef __linux__
2664 PATHCALL(int, REALSTAT, DUALCALL_STAT, \
2665 (const char *path, struct stat *sb), \
2666 (const char *, struct stat *), \
2667 (path, sb))
2668
2669 PATHCALL(int, REALLSTAT, DUALCALL_LSTAT, \
2670 (const char *path, struct stat *sb), \
2671 (const char *, struct stat *), \
2672 (path, sb))
2673 #endif
2674
2675 PATHCALL(int, chown, DUALCALL_CHOWN, \
2676 (const char *path, uid_t owner, gid_t group), \
2677 (const char *, uid_t, gid_t), \
2678 (path, owner, group))
2679
2680 PATHCALL(int, lchown, DUALCALL_LCHOWN, \
2681 (const char *path, uid_t owner, gid_t group), \
2682 (const char *, uid_t, gid_t), \
2683 (path, owner, group))
2684
2685 PATHCALL(int, chmod, DUALCALL_CHMOD, \
2686 (const char *path, mode_t mode), \
2687 (const char *, mode_t), \
2688 (path, mode))
2689
2690 PATHCALL(int, lchmod, DUALCALL_LCHMOD, \
2691 (const char *path, mode_t mode), \
2692 (const char *, mode_t), \
2693 (path, mode))
2694
2695 #ifdef __NetBSD__
2696 PATHCALL(int, REALSTATVFS1, DUALCALL_STATVFS1, \
2697 (const char *path, struct statvfs *buf, int flags), \
2698 (const char *, struct statvfs *, int), \
2699 (path, buf, flags))
2700 #endif
2701
2702 PATHCALL(int, unlink, DUALCALL_UNLINK, \
2703 (const char *path), \
2704 (const char *), \
2705 (path))
2706
2707 PATHCALL(int, symlink, DUALCALL_SYMLINK, \
2708 (const char *target, const char *path), \
2709 (const char *, const char *), \
2710 (target, path))
2711
2712 /*
2713 * readlink() can be called from malloc which can be called
2714 * from dlsym() during init
2715 */
2716 ssize_t
2717 readlink(const char *path, char *buf, size_t bufsiz)
2718 {
2719 int (*op_readlink)(const char *, char *, size_t);
2720 enum pathtype pt;
2721
2722 if ((pt = path_isrump(path)) != PATH_HOST) {
2723 op_readlink = GETSYSCALL(rump, READLINK);
2724 if (pt == PATH_RUMP)
2725 path = path_host2rump(path);
2726 } else {
2727 op_readlink = GETSYSCALL(host, READLINK);
2728 }
2729
2730 if (__predict_false(op_readlink == NULL)) {
2731 errno = ENOENT;
2732 return -1;
2733 }
2734
2735 return op_readlink(path, buf, bufsiz);
2736 }
2737
2738 PATHCALL(int, mkdir, DUALCALL_MKDIR, \
2739 (const char *path, mode_t mode), \
2740 (const char *, mode_t), \
2741 (path, mode))
2742
2743 PATHCALL(int, rmdir, DUALCALL_RMDIR, \
2744 (const char *path), \
2745 (const char *), \
2746 (path))
2747
2748 PATHCALL(int, utimes, DUALCALL_UTIMES, \
2749 (const char *path, const struct timeval *tv), \
2750 (const char *, const struct timeval *), \
2751 (path, tv))
2752
2753 PATHCALL(int, lutimes, DUALCALL_LUTIMES, \
2754 (const char *path, const struct timeval *tv), \
2755 (const char *, const struct timeval *), \
2756 (path, tv))
2757
2758 #ifdef HAVE_CHFLAGS
2759 PATHCALL(int, chflags, DUALCALL_CHFLAGS, \
2760 (const char *path, u_long flags), \
2761 (const char *, u_long), \
2762 (path, flags))
2763
2764 PATHCALL(int, lchflags, DUALCALL_LCHFLAGS, \
2765 (const char *path, u_long flags), \
2766 (const char *, u_long), \
2767 (path, flags))
2768 #endif /* HAVE_CHFLAGS */
2769
2770 PATHCALL(int, truncate, DUALCALL_TRUNCATE, \
2771 (const char *path, off_t length), \
2772 (const char *, off_t), \
2773 (path, length))
2774
2775 PATHCALL(int, access, DUALCALL_ACCESS, \
2776 (const char *path, int mode), \
2777 (const char *, int), \
2778 (path, mode))
2779
2780 #ifndef __linux__
2781 PATHCALL(int, REALMKNOD, DUALCALL_MKNOD, \
2782 (const char *path, mode_t mode, dev_t dev), \
2783 (const char *, mode_t, dev_t), \
2784 (path, mode, dev))
2785 #endif
2786
2787 /*
2788 * Note: with mount the decisive parameter is the mount
2789 * destination directory. This is because we don't really know
2790 * about the "source" directory in a generic call (and besides,
2791 * it might not even exist, cf. nfs).
2792 */
2793 #ifdef __NetBSD__
2794 PATHCALL(int, REALMOUNT, DUALCALL_MOUNT, \
2795 (const char *type, const char *path, int flags, \
2796 void *data, size_t dlen), \
2797 (const char *, const char *, int, void *, size_t), \
2798 (type, path, flags, data, dlen))
2799
2800 PATHCALL(int, unmount, DUALCALL_UNMOUNT, \
2801 (const char *path, int flags), \
2802 (const char *, int), \
2803 (path, flags))
2804 #endif /* __NetBSD__ */
2805
2806 #ifdef HAVE___QUOTACTL
2807 PATHCALL(int, __quotactl, DUALCALL_QUOTACTL, \
2808 (const char *path, struct quotactl_args *args), \
2809 (const char *, struct quotactl_args *), \
2810 (path, args))
2811 #endif /* HAVE___QUOTACTL */
2812
2813 #ifdef __NetBSD__
2814 PATHCALL(int, REALGETFH, DUALCALL_GETFH, \
2815 (const char *path, void *fhp, size_t *fh_size), \
2816 (const char *, void *, size_t *), \
2817 (path, fhp, fh_size))
2818 #endif
2819
2820 /*
2821 * These act different on a per-process vfs configuration
2822 */
2823
2824 #ifdef __NetBSD__
2825 VFSCALL(VFSBIT_GETVFSSTAT, int, REALGETVFSSTAT, DUALCALL_GETVFSSTAT, \
2826 (struct statvfs *buf, size_t buflen, int flags), \
2827 (struct statvfs *, size_t, int), \
2828 (buf, buflen, flags))
2829 #endif
2830
2831 #ifdef __NetBSD__
2832 VFSCALL(VFSBIT_FHCALLS, int, REALFHOPEN, DUALCALL_FHOPEN, \
2833 (const void *fhp, size_t fh_size, int flags), \
2834 (const char *, size_t, int), \
2835 (fhp, fh_size, flags))
2836
2837 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTAT, DUALCALL_FHSTAT, \
2838 (const void *fhp, size_t fh_size, struct stat *sb), \
2839 (const char *, size_t, struct stat *), \
2840 (fhp, fh_size, sb))
2841
2842 VFSCALL(VFSBIT_FHCALLS, int, REALFHSTATVFS1, DUALCALL_FHSTATVFS1, \
2843 (const void *fhp, size_t fh_size, struct statvfs *sb, int flgs),\
2844 (const char *, size_t, struct statvfs *, int), \
2845 (fhp, fh_size, sb, flgs))
2846 #endif
2847
2848
2849 #ifdef __NetBSD__
2850
2851 /* finally, put nfssvc here. "keep the namespace clean" */
2852 #include <nfs/rpcv2.h>
2853 #include <nfs/nfs.h>
2854
2855 int
2856 nfssvc(int flags, void *argstructp)
2857 {
2858 int (*op_nfssvc)(int, void *);
2859
2860 if (vfsbits & VFSBIT_NFSSVC){
2861 struct nfsd_args *nfsdargs;
2862
2863 /* massage the socket descriptor if necessary */
2864 if (flags == NFSSVC_ADDSOCK) {
2865 nfsdargs = argstructp;
2866 nfsdargs->sock = fd_host2rump(nfsdargs->sock);
2867 }
2868 op_nfssvc = GETSYSCALL(rump, NFSSVC);
2869 } else
2870 op_nfssvc = GETSYSCALL(host, NFSSVC);
2871
2872 return op_nfssvc(flags, argstructp);
2873 }
2874 #endif /* __NetBSD__ */
2875