rumpuser.c revision 1.29 1 /* $NetBSD: rumpuser.c,v 1.29 2013/03/18 21:00:52 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "rumpuser_port.h"
29
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser.c,v 1.29 2013/03/18 21:00:52 pooka Exp $");
32 #endif /* !lint */
33
34 #include <sys/ioctl.h>
35 #include <sys/mman.h>
36 #include <sys/uio.h>
37 #include <sys/stat.h>
38 #include <sys/time.h>
39
40 #ifdef __NetBSD__
41 #include <sys/disk.h>
42 #include <sys/disklabel.h>
43 #include <sys/dkio.h>
44 #include <sys/event.h>
45 #endif
46
47 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
48 #include <sys/sysctl.h>
49 #endif
50
51 #include <assert.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <netdb.h>
55 #include <poll.h>
56 #include <signal.h>
57 #include <stdarg.h>
58 #include <stdint.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <time.h>
63 #include <unistd.h>
64
65 #include <rump/rumpuser.h>
66
67 #include "rumpuser_int.h"
68
69 int
70 rumpuser_getversion(void)
71 {
72
73 return RUMPUSER_VERSION;
74 }
75
76 int
77 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp, int *error)
78 {
79 struct stat sb;
80 uint64_t size;
81 int needsdev = 0, rv = 0, ft;
82 int fd = -1;
83
84 if (stat(path, &sb) == -1) {
85 seterror(errno);
86 return -1;
87 }
88
89 switch (sb.st_mode & S_IFMT) {
90 case S_IFDIR:
91 ft = RUMPUSER_FT_DIR;
92 break;
93 case S_IFREG:
94 ft = RUMPUSER_FT_REG;
95 break;
96 case S_IFBLK:
97 ft = RUMPUSER_FT_BLK;
98 needsdev = 1;
99 break;
100 case S_IFCHR:
101 ft = RUMPUSER_FT_CHR;
102 needsdev = 1;
103 break;
104 default:
105 ft = RUMPUSER_FT_OTHER;
106 break;
107 }
108
109 if (!needsdev) {
110 size = sb.st_size;
111 } else if (sizep) {
112 /*
113 * Welcome to the jungle. Of course querying the kernel
114 * for a device partition size is supposed to be far from
115 * trivial. On NetBSD we use ioctl. On $other platform
116 * we have a problem. We try "the lseek trick" and just
117 * fail if that fails. Platform specific code can later
118 * be written here if appropriate.
119 *
120 * On NetBSD we hope and pray that for block devices nobody
121 * else is holding them open, because otherwise the kernel
122 * will not permit us to open it. Thankfully, this is
123 * usually called only in bootstrap and then we can
124 * forget about it.
125 */
126 #ifndef __NetBSD__
127 off_t off;
128
129 fd = open(path, O_RDONLY);
130 if (fd == -1) {
131 seterror(errno);
132 rv = -1;
133 goto out;
134 }
135
136 off = lseek(fd, 0, SEEK_END);
137 if (off != 0) {
138 size = off;
139 goto out;
140 }
141 fprintf(stderr, "error: device size query not implemented on "
142 "this platform\n");
143 seterror(EOPNOTSUPP);
144 rv = -1;
145 goto out;
146 #else
147 struct disklabel lab;
148 struct partition *parta;
149 struct dkwedge_info dkw;
150
151 fd = open(path, O_RDONLY);
152 if (fd == -1) {
153 seterror(errno);
154 rv = -1;
155 goto out;
156 }
157
158 if (ioctl(fd, DIOCGDINFO, &lab) == 0) {
159 parta = &lab.d_partitions[DISKPART(sb.st_rdev)];
160 size = (uint64_t)lab.d_secsize * parta->p_size;
161 goto out;
162 }
163
164 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) {
165 /*
166 * XXX: should use DIOCGDISKINFO to query
167 * sector size, but that requires proplib,
168 * so just don't bother for now. it's nice
169 * that something as difficult as figuring out
170 * a partition's size has been made so easy.
171 */
172 size = dkw.dkw_size << DEV_BSHIFT;
173 goto out;
174 }
175
176 seterror(errno);
177 rv = -1;
178 #endif /* __NetBSD__ */
179 }
180
181 out:
182 if (rv == 0 && sizep)
183 *sizep = size;
184 if (rv == 0 && ftp)
185 *ftp = ft;
186 if (fd != -1)
187 close(fd);
188
189 return rv;
190 }
191
192 int
193 rumpuser_nanosleep(uint64_t *sec, uint64_t *nsec, int *error)
194 {
195 struct timespec rqt, rmt;
196 int rv;
197
198 /*LINTED*/
199 rqt.tv_sec = *sec;
200 /*LINTED*/
201 rqt.tv_nsec = *nsec;
202
203 KLOCK_WRAP(rv = nanosleep(&rqt, &rmt));
204 if (rv == -1)
205 seterror(errno);
206
207 *sec = rmt.tv_sec;
208 *nsec = rmt.tv_nsec;
209
210 return rv;
211 }
212
213 void *
214 rumpuser_malloc(size_t howmuch, int alignment)
215 {
216 void *mem;
217 int rv;
218
219 if (alignment == 0)
220 alignment = sizeof(void *);
221
222 rv = posix_memalign(&mem, (size_t)alignment, howmuch);
223 if (__predict_false(rv != 0)) {
224 if (rv == EINVAL) {
225 printf("rumpuser_malloc: invalid alignment %d\n",
226 alignment);
227 abort();
228 }
229 mem = NULL;
230 }
231
232 return mem;
233 }
234
235 void *
236 rumpuser_realloc(void *ptr, size_t howmuch)
237 {
238
239 return realloc(ptr, howmuch);
240 }
241
242 void
243 rumpuser_free(void *ptr)
244 {
245
246 free(ptr);
247 }
248
249 void *
250 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit,
251 int exec, int *error)
252 {
253 void *rv;
254 int prot;
255
256 #ifndef MAP_ALIGNED
257 #define MAP_ALIGNED(a) 0
258 if (alignbit)
259 fprintf(stderr, "rumpuser_anonmmap: warning, requested "
260 "alignment not supported by hypervisor\n");
261 #endif
262
263 prot = PROT_READ|PROT_WRITE;
264 if (exec)
265 prot |= PROT_EXEC;
266 rv = mmap(prefaddr, size, prot,
267 MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(alignbit), -1, 0);
268 if (rv == MAP_FAILED) {
269 seterror(errno);
270 return NULL;
271 }
272 return rv;
273 }
274
275 void
276 rumpuser_unmap(void *addr, size_t len)
277 {
278 int rv;
279
280 rv = munmap(addr, len);
281 assert(rv == 0);
282 }
283
284 void *
285 rumpuser_filemmap(int fd, off_t offset, size_t len, int flags, int *error)
286 {
287 void *rv;
288 int mmflags, prot;
289
290 if (flags & RUMPUSER_FILEMMAP_TRUNCATE) {
291 if (ftruncate(fd, offset + len) == -1) {
292 seterror(errno);
293 return NULL;
294 }
295 }
296
297 /* it's implicit */
298 #if defined(__sun__) && !defined(MAP_FILE)
299 #define MAP_FILE 0
300 #endif
301
302 mmflags = MAP_FILE;
303 if (flags & RUMPUSER_FILEMMAP_SHARED)
304 mmflags |= MAP_SHARED;
305 else
306 mmflags |= MAP_PRIVATE;
307
308 prot = 0;
309 if (flags & RUMPUSER_FILEMMAP_READ)
310 prot |= PROT_READ;
311 if (flags & RUMPUSER_FILEMMAP_WRITE)
312 prot |= PROT_WRITE;
313
314 rv = mmap(NULL, len, PROT_READ|PROT_WRITE, mmflags, fd, offset);
315 if (rv == MAP_FAILED) {
316 seterror(errno);
317 return NULL;
318 }
319
320 seterror(0);
321 return rv;
322 }
323
324 int
325 rumpuser_memsync(void *addr, size_t len, int *error)
326 {
327
328 DOCALL_KLOCK(int, (msync(addr, len, MS_SYNC)));
329 }
330
331 int
332 rumpuser_open(const char *path, int ruflags, int *error)
333 {
334 int flags;
335
336 switch (ruflags & RUMPUSER_OPEN_ACCMODE) {
337 case RUMPUSER_OPEN_RDONLY:
338 flags = O_RDONLY;
339 break;
340 case RUMPUSER_OPEN_WRONLY:
341 flags = O_WRONLY;
342 break;
343 case RUMPUSER_OPEN_RDWR:
344 flags = O_RDWR;
345 break;
346 default:
347 *error = EINVAL;
348 return -1;
349 }
350
351 #define TESTSET(_ru_, _h_) if (ruflags & _ru_) flags |= _h_;
352 TESTSET(RUMPUSER_OPEN_CREATE, O_CREAT);
353 TESTSET(RUMPUSER_OPEN_EXCL, O_EXCL);
354 #ifdef O_DIRECT
355 TESTSET(RUMPUSER_OPEN_DIRECT, O_DIRECT);
356 #else
357 if (ruflags & RUMPUSER_OPEN_DIRECT) {
358 *error = EOPNOTSUPP;
359 return -1;
360 }
361 #endif
362 #undef TESTSET
363
364 DOCALL_KLOCK(int, (open(path, flags, 0644)));
365 }
366
367 int
368 rumpuser_ioctl(int fd, u_long cmd, void *data, int *error)
369 {
370
371 DOCALL_KLOCK(int, (ioctl(fd, cmd, data)));
372 }
373
374 int
375 rumpuser_close(int fd, int *error)
376 {
377
378 DOCALL(int, close(fd));
379 }
380
381 int
382 rumpuser_fsync(int fd, int *error)
383 {
384
385 DOCALL_KLOCK(int, fsync(fd));
386 }
387
388 ssize_t
389 rumpuser_read(int fd, void *data, size_t size, int *error)
390 {
391 ssize_t rv;
392
393 KLOCK_WRAP(rv = read(fd, data, size));
394 if (rv == -1)
395 seterror(errno);
396
397 return rv;
398 }
399
400 ssize_t
401 rumpuser_pread(int fd, void *data, size_t size, off_t offset, int *error)
402 {
403 ssize_t rv;
404
405 KLOCK_WRAP(rv = pread(fd, data, size, offset));
406 if (rv == -1)
407 seterror(errno);
408
409 return rv;
410 }
411
412 void
413 rumpuser_read_bio(int fd, void *data, size_t size, off_t offset,
414 rump_biodone_fn biodone, void *biodonecookie)
415 {
416 ssize_t rv;
417 int error = 0;
418
419 rv = rumpuser_pread(fd, data, size, offset, &error);
420 /* check against <0 instead of ==-1 to get typing below right */
421 if (rv < 0)
422 rv = 0;
423
424 /* LINTED: see above */
425 biodone(biodonecookie, rv, error);
426 }
427
428 ssize_t
429 rumpuser_write(int fd, const void *data, size_t size, int *error)
430 {
431 ssize_t rv;
432
433 KLOCK_WRAP(rv = write(fd, data, size));
434 if (rv == -1)
435 seterror(errno);
436
437 return rv;
438 }
439
440 ssize_t
441 rumpuser_pwrite(int fd, const void *data, size_t size, off_t offset, int *error)
442 {
443 ssize_t rv;
444
445 KLOCK_WRAP(rv = pwrite(fd, data, size, offset));
446 if (rv == -1)
447 seterror(errno);
448
449 return rv;
450 }
451
452 void
453 rumpuser_write_bio(int fd, const void *data, size_t size, off_t offset,
454 rump_biodone_fn biodone, void *biodonecookie)
455 {
456 ssize_t rv;
457 int error = 0;
458
459 rv = rumpuser_pwrite(fd, data, size, offset, &error);
460 /* check against <0 instead of ==-1 to get typing below right */
461 if (rv < 0)
462 rv = 0;
463
464 /* LINTED: see above */
465 biodone(biodonecookie, rv, error);
466 }
467
468 ssize_t
469 rumpuser_readv(int fd, const struct rumpuser_iovec *riov, int iovcnt,
470 int *error)
471 {
472 struct iovec *iovp;
473 ssize_t rv;
474 int i;
475
476 iovp = malloc(iovcnt * sizeof(struct iovec));
477 if (iovp == NULL) {
478 seterror(ENOMEM);
479 return -1;
480 }
481 for (i = 0; i < iovcnt; i++) {
482 iovp[i].iov_base = riov[i].iov_base;
483 /*LINTED*/
484 iovp[i].iov_len = riov[i].iov_len;
485 }
486
487 KLOCK_WRAP(rv = readv(fd, iovp, iovcnt));
488 if (rv == -1)
489 seterror(errno);
490 free(iovp);
491
492 return rv;
493 }
494
495 ssize_t
496 rumpuser_writev(int fd, const struct rumpuser_iovec *riov, int iovcnt,
497 int *error)
498 {
499 struct iovec *iovp;
500 ssize_t rv;
501 int i;
502
503 iovp = malloc(iovcnt * sizeof(struct iovec));
504 if (iovp == NULL) {
505 seterror(ENOMEM);
506 return -1;
507 }
508 for (i = 0; i < iovcnt; i++) {
509 iovp[i].iov_base = riov[i].iov_base;
510 /*LINTED*/
511 iovp[i].iov_len = riov[i].iov_len;
512 }
513
514 KLOCK_WRAP(rv = writev(fd, iovp, iovcnt));
515 if (rv == -1)
516 seterror(errno);
517 free(iovp);
518
519 return rv;
520 }
521
522 int
523 rumpuser_gettime(uint64_t *sec, uint64_t *nsec, int *error)
524 {
525 struct timeval tv;
526 int rv;
527
528 rv = gettimeofday(&tv, NULL);
529 if (rv == -1) {
530 seterror(errno);
531 return rv;
532 }
533
534 *sec = tv.tv_sec;
535 *nsec = tv.tv_usec * 1000;
536
537 return 0;
538 }
539
540 int
541 rumpuser_getenv(const char *name, char *buf, size_t blen, int *error)
542 {
543
544 DOCALL(int, getenv_r(name, buf, blen));
545 }
546
547 int
548 rumpuser_gethostname(char *name, size_t namelen, int *error)
549 {
550 char tmp[MAXHOSTNAMELEN];
551
552 if (gethostname(tmp, sizeof(tmp)) == -1) {
553 snprintf(name, namelen, "rump-%05d.rumpdomain", (int)getpid());
554 } else {
555 snprintf(name, namelen, "rump-%05d.%s.rumpdomain",
556 (int)getpid(), tmp);
557 }
558
559 *error = 0;
560 return 0;
561 }
562
563 int
564 rumpuser_poll(struct pollfd *fds, int nfds, int timeout, int *error)
565 {
566
567 DOCALL_KLOCK(int, (poll(fds, (nfds_t)nfds, timeout)));
568 }
569
570 int
571 rumpuser_putchar(int c, int *error)
572 {
573
574 DOCALL(int, (putchar(c)));
575 }
576
577 void
578 rumpuser_exit(int rv)
579 {
580
581 if (rv == RUMPUSER_PANIC)
582 abort();
583 else
584 exit(rv);
585 }
586
587 void
588 rumpuser_seterrno(int error)
589 {
590
591 errno = error;
592 }
593
594 /*
595 * On NetBSD we use kqueue, on Linux we use inotify. The underlying
596 * interface requirements aren't quite the same, but we have a very
597 * good chance of doing the fd->path mapping on Linux thanks to dcache,
598 * so just keep the existing interfaces for now.
599 */
600 #if defined(__NetBSD__)
601 int
602 rumpuser_writewatchfile_setup(int kq, int fd, intptr_t opaque, int *error)
603 {
604 struct kevent kev;
605
606 if (kq == -1) {
607 kq = kqueue();
608 if (kq == -1) {
609 seterror(errno);
610 return -1;
611 }
612 }
613
614 EV_SET(&kev, fd, EVFILT_VNODE, EV_ADD|EV_ENABLE|EV_CLEAR,
615 NOTE_WRITE, 0, opaque);
616 if (kevent(kq, &kev, 1, NULL, 0, NULL) == -1) {
617 seterror(errno);
618 return -1;
619 }
620
621 return kq;
622 }
623
624 int
625 rumpuser_writewatchfile_wait(int kq, intptr_t *opaque, int *error)
626 {
627 struct kevent kev;
628 int rv;
629
630 again:
631 KLOCK_WRAP(rv = kevent(kq, NULL, 0, &kev, 1, NULL));
632 if (rv == -1) {
633 if (errno == EINTR)
634 goto again;
635 seterror(errno);
636 return -1;
637 }
638
639 if (opaque)
640 *opaque = kev.udata;
641 return rv;
642 }
643
644 #elif defined(__linux__)
645 #include <sys/inotify.h>
646
647 int
648 rumpuser_writewatchfile_setup(int inotify, int fd, intptr_t notused, int *error)
649 {
650 char procbuf[PATH_MAX], linkbuf[PATH_MAX];
651 ssize_t nn;
652
653 if (inotify == -1) {
654 inotify = inotify_init();
655 if (inotify == -1) {
656 seterror(errno);
657 return -1;
658 }
659 }
660
661 /* ok, need to map fd into path for inotify */
662 snprintf(procbuf, sizeof(procbuf), "/proc/self/fd/%d", fd);
663 nn = readlink(procbuf, linkbuf, sizeof(linkbuf)-1);
664 if (nn >= (ssize_t)sizeof(linkbuf)-1) {
665 nn = -1;
666 errno = E2BIG; /* pick something */
667 }
668 if (nn == -1) {
669 seterror(errno);
670 close(inotify);
671 return -1;
672 }
673
674 linkbuf[nn] = '\0';
675 if (inotify_add_watch(inotify, linkbuf, IN_MODIFY) == -1) {
676 seterror(errno);
677 close(inotify);
678 return -1;
679 }
680
681 return inotify;
682 }
683
684 int
685 rumpuser_writewatchfile_wait(int kq, intptr_t *opaque, int *error)
686 {
687 struct inotify_event iev;
688 ssize_t nn;
689
690 do {
691 KLOCK_WRAP(nn = read(kq, &iev, sizeof(iev)));
692 } while (errno == EINTR);
693
694 if (nn == -1) {
695 seterror(errno);
696 return -1;
697 }
698 return (nn/sizeof(iev));
699 }
700
701 #else
702
703 /* a polling default implementation */
704 int
705 rumpuser_writewatchfile_setup(int inotify, int fd, intptr_t notused, int *error)
706 {
707 static int warned = 0;
708
709 if (!warned) {
710 fprintf(stderr, "WARNING: rumpuser writewatchfile routines are "
711 "polling-only on this platform\n");
712 warned = 1;
713 }
714
715 return 0;
716 }
717
718 int
719 rumpuser_writewatchfile_wait(int kq, intptr_t *opaque, int *error)
720 {
721
722 KLOCK_WRAP(usleep(10000));
723 return 0;
724 }
725 #endif
726
727 /*
728 * This is meant for safe debugging prints from the kernel.
729 */
730 int
731 rumpuser_dprintf(const char *format, ...)
732 {
733 va_list ap;
734 int rv;
735
736 va_start(ap, format);
737 rv = vfprintf(stderr, format, ap);
738 va_end(ap);
739
740 return rv;
741 }
742
743 int
744 rumpuser_kill(int64_t pid, int sig, int *error)
745 {
746
747 #ifdef __NetBSD__
748 if (pid == RUMPUSER_PID_SELF) {
749 DOCALL(int, raise(sig));
750 } else {
751 DOCALL(int, kill((pid_t)pid, sig));
752 }
753 #else
754 /* XXXfixme: signal numbers may not match on non-NetBSD */
755 seterror(EOPNOTSUPP);
756 return -1;
757 #endif
758 }
759
760 int
761 rumpuser_getnhostcpu(void)
762 {
763 int ncpu = 1;
764
765 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
766 size_t sz = sizeof(ncpu);
767
768 sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0);
769 #elif defined(__linux__) || defined(__CYGWIN__)
770 FILE *fp;
771 char *line = NULL;
772 size_t n = 0;
773
774 /* If anyone knows a better way, I'm all ears */
775 if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
776 ncpu = 0;
777 while (getline(&line, &n, fp) != -1) {
778 if (strncmp(line,
779 "processor", sizeof("processor")-1) == 0)
780 ncpu++;
781 }
782 if (ncpu == 0)
783 ncpu = 1;
784 free(line);
785 fclose(fp);
786 }
787 #elif __sun__
788 /* XXX: this is just a rough estimate ... */
789 ncpu = sysconf(_SC_NPROCESSORS_ONLN);
790 #endif
791
792 return ncpu;
793 }
794
795 /* XXX: this hypercall needs a better name */
796 uint32_t
797 rumpuser_arc4random(void)
798 {
799
800 return arc4random();
801 }
802