rumpuser.c revision 1.39 1 /* $NetBSD: rumpuser.c,v 1.39 2013/04/29 13:19:11 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "rumpuser_port.h"
29
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser.c,v 1.39 2013/04/29 13:19:11 pooka Exp $");
32 #endif /* !lint */
33
34 #include <sys/ioctl.h>
35 #include <sys/mman.h>
36 #include <sys/uio.h>
37 #include <sys/stat.h>
38 #include <sys/time.h>
39
40 #ifdef __NetBSD__
41 #include <sys/disk.h>
42 #include <sys/disklabel.h>
43 #include <sys/dkio.h>
44 #endif
45
46 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
47 #include <sys/sysctl.h>
48 #endif
49
50 #include <assert.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <netdb.h>
54 #include <signal.h>
55 #include <stdarg.h>
56 #include <stdint.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <time.h>
61 #include <unistd.h>
62
63 #include <rump/rumpuser.h>
64
65 #include "rumpuser_int.h"
66
67 rump_unschedulefn rumpuser__unschedule;
68 rump_reschedulefn rumpuser__reschedule;
69
70 int
71 rumpuser_init(int version,
72 rump_reschedulefn rumpkern_resched, rump_unschedulefn rumpkern_unsched)
73 {
74
75 if (version != RUMPUSER_VERSION) {
76 fprintf(stderr, "rumpuser mismatch, kern: %d, hypervisor %d\n",
77 version, RUMPUSER_VERSION);
78 return 1;
79 }
80
81 #ifdef RUMPUSER_USE_DEVRANDOM
82 uint32_t rv;
83 int fd;
84
85 if ((fd = open("/dev/urandom", O_RDONLY)) == -1) {
86 srandom(time(NULL));
87 } else {
88 if (read(fd, &rv, sizeof(rv)) != sizeof(rv))
89 srandom(time(NULL));
90 else
91 srandom(rv);
92 close(fd);
93 }
94 #endif
95
96 rumpuser__thrinit();
97
98 rumpuser__unschedule = rumpkern_unsched;
99 rumpuser__reschedule = rumpkern_resched;
100
101 return 0;
102 }
103
104 int
105 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp, int *error)
106 {
107 struct stat sb;
108 uint64_t size;
109 int needsdev = 0, rv = 0, ft;
110 int fd = -1;
111
112 if (stat(path, &sb) == -1) {
113 seterror(errno);
114 return -1;
115 }
116
117 switch (sb.st_mode & S_IFMT) {
118 case S_IFDIR:
119 ft = RUMPUSER_FT_DIR;
120 break;
121 case S_IFREG:
122 ft = RUMPUSER_FT_REG;
123 break;
124 case S_IFBLK:
125 ft = RUMPUSER_FT_BLK;
126 needsdev = 1;
127 break;
128 case S_IFCHR:
129 ft = RUMPUSER_FT_CHR;
130 needsdev = 1;
131 break;
132 default:
133 ft = RUMPUSER_FT_OTHER;
134 break;
135 }
136
137 if (!needsdev) {
138 size = sb.st_size;
139 } else if (sizep) {
140 /*
141 * Welcome to the jungle. Of course querying the kernel
142 * for a device partition size is supposed to be far from
143 * trivial. On NetBSD we use ioctl. On $other platform
144 * we have a problem. We try "the lseek trick" and just
145 * fail if that fails. Platform specific code can later
146 * be written here if appropriate.
147 *
148 * On NetBSD we hope and pray that for block devices nobody
149 * else is holding them open, because otherwise the kernel
150 * will not permit us to open it. Thankfully, this is
151 * usually called only in bootstrap and then we can
152 * forget about it.
153 */
154 #ifndef __NetBSD__
155 off_t off;
156
157 fd = open(path, O_RDONLY);
158 if (fd == -1) {
159 seterror(errno);
160 rv = -1;
161 goto out;
162 }
163
164 off = lseek(fd, 0, SEEK_END);
165 if (off != 0) {
166 size = off;
167 goto out;
168 }
169 fprintf(stderr, "error: device size query not implemented on "
170 "this platform\n");
171 seterror(EOPNOTSUPP);
172 rv = -1;
173 goto out;
174 #else
175 struct disklabel lab;
176 struct partition *parta;
177 struct dkwedge_info dkw;
178
179 fd = open(path, O_RDONLY);
180 if (fd == -1) {
181 seterror(errno);
182 rv = -1;
183 goto out;
184 }
185
186 if (ioctl(fd, DIOCGDINFO, &lab) == 0) {
187 parta = &lab.d_partitions[DISKPART(sb.st_rdev)];
188 size = (uint64_t)lab.d_secsize * parta->p_size;
189 goto out;
190 }
191
192 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) {
193 /*
194 * XXX: should use DIOCGDISKINFO to query
195 * sector size, but that requires proplib,
196 * so just don't bother for now. it's nice
197 * that something as difficult as figuring out
198 * a partition's size has been made so easy.
199 */
200 size = dkw.dkw_size << DEV_BSHIFT;
201 goto out;
202 }
203
204 seterror(errno);
205 rv = -1;
206 #endif /* __NetBSD__ */
207 }
208
209 out:
210 if (rv == 0 && sizep)
211 *sizep = size;
212 if (rv == 0 && ftp)
213 *ftp = ft;
214 if (fd != -1)
215 close(fd);
216
217 return rv;
218 }
219
220 void *
221 rumpuser_malloc(size_t howmuch, int alignment)
222 {
223 void *mem;
224 int rv;
225
226 if (alignment == 0)
227 alignment = sizeof(void *);
228
229 rv = posix_memalign(&mem, (size_t)alignment, howmuch);
230 if (__predict_false(rv != 0)) {
231 if (rv == EINVAL) {
232 printf("rumpuser_malloc: invalid alignment %d\n",
233 alignment);
234 abort();
235 }
236 mem = NULL;
237 }
238
239 return mem;
240 }
241
242 /*ARGSUSED1*/
243 void
244 rumpuser_free(void *ptr, size_t size)
245 {
246
247 free(ptr);
248 }
249
250 void *
251 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit,
252 int exec, int *error)
253 {
254 void *rv;
255 int prot;
256
257 #ifndef MAP_ALIGNED
258 #define MAP_ALIGNED(a) 0
259 if (alignbit)
260 fprintf(stderr, "rumpuser_anonmmap: warning, requested "
261 "alignment not supported by hypervisor\n");
262 #endif
263
264 prot = PROT_READ|PROT_WRITE;
265 if (exec)
266 prot |= PROT_EXEC;
267 rv = mmap(prefaddr, size, prot,
268 MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(alignbit), -1, 0);
269 if (rv == MAP_FAILED) {
270 seterror(errno);
271 return NULL;
272 }
273 return rv;
274 }
275
276 void
277 rumpuser_unmap(void *addr, size_t len)
278 {
279 int rv;
280
281 rv = munmap(addr, len);
282 assert(rv == 0);
283 }
284
285 int
286 rumpuser_open(const char *path, int ruflags, int *error)
287 {
288 int flags;
289
290 switch (ruflags & RUMPUSER_OPEN_ACCMODE) {
291 case RUMPUSER_OPEN_RDONLY:
292 flags = O_RDONLY;
293 break;
294 case RUMPUSER_OPEN_WRONLY:
295 flags = O_WRONLY;
296 break;
297 case RUMPUSER_OPEN_RDWR:
298 flags = O_RDWR;
299 break;
300 default:
301 *error = EINVAL;
302 return -1;
303 }
304
305 #define TESTSET(_ru_, _h_) if (ruflags & _ru_) flags |= _h_;
306 TESTSET(RUMPUSER_OPEN_CREATE, O_CREAT);
307 TESTSET(RUMPUSER_OPEN_EXCL, O_EXCL);
308 #ifdef O_DIRECT
309 TESTSET(RUMPUSER_OPEN_DIRECT, O_DIRECT);
310 #else
311 if (ruflags & RUMPUSER_OPEN_DIRECT) {
312 *error = EOPNOTSUPP;
313 return -1;
314 }
315 #endif
316 #undef TESTSET
317
318 DOCALL_KLOCK(int, (open(path, flags, 0644)));
319 }
320
321 int
322 rumpuser_close(int fd, int *error)
323 {
324
325 DOCALL(int, close(fd));
326 }
327
328 int
329 rumpuser_fsync(int fd, int *error)
330 {
331
332 DOCALL_KLOCK(int, fsync(fd));
333 }
334
335 ssize_t
336 rumpuser_read(int fd, void *data, size_t size, int *error)
337 {
338 ssize_t rv;
339
340 KLOCK_WRAP(rv = read(fd, data, size));
341 if (rv == -1)
342 seterror(errno);
343
344 return rv;
345 }
346
347 ssize_t
348 rumpuser_pread(int fd, void *data, size_t size, off_t offset, int *error)
349 {
350 ssize_t rv;
351
352 KLOCK_WRAP(rv = pread(fd, data, size, offset));
353 if (rv == -1)
354 seterror(errno);
355
356 return rv;
357 }
358
359 ssize_t
360 rumpuser_write(int fd, const void *data, size_t size, int *error)
361 {
362 ssize_t rv;
363
364 KLOCK_WRAP(rv = write(fd, data, size));
365 if (rv == -1)
366 seterror(errno);
367
368 return rv;
369 }
370
371 ssize_t
372 rumpuser_pwrite(int fd, const void *data, size_t size, off_t offset, int *error)
373 {
374 ssize_t rv;
375
376 KLOCK_WRAP(rv = pwrite(fd, data, size, offset));
377 if (rv == -1)
378 seterror(errno);
379
380 return rv;
381 }
382
383 ssize_t
384 rumpuser_readv(int fd, const struct rumpuser_iovec *riov, int iovcnt,
385 int *error)
386 {
387 struct iovec *iovp;
388 ssize_t rv;
389 int i;
390
391 iovp = malloc(iovcnt * sizeof(struct iovec));
392 if (iovp == NULL) {
393 seterror(ENOMEM);
394 return -1;
395 }
396 for (i = 0; i < iovcnt; i++) {
397 iovp[i].iov_base = riov[i].iov_base;
398 /*LINTED*/
399 iovp[i].iov_len = riov[i].iov_len;
400 }
401
402 KLOCK_WRAP(rv = readv(fd, iovp, iovcnt));
403 if (rv == -1)
404 seterror(errno);
405 free(iovp);
406
407 return rv;
408 }
409
410 ssize_t
411 rumpuser_writev(int fd, const struct rumpuser_iovec *riov, int iovcnt,
412 int *error)
413 {
414 struct iovec *iovp;
415 ssize_t rv;
416 int i;
417
418 iovp = malloc(iovcnt * sizeof(struct iovec));
419 if (iovp == NULL) {
420 seterror(ENOMEM);
421 return -1;
422 }
423 for (i = 0; i < iovcnt; i++) {
424 iovp[i].iov_base = riov[i].iov_base;
425 /*LINTED*/
426 iovp[i].iov_len = riov[i].iov_len;
427 }
428
429 KLOCK_WRAP(rv = writev(fd, iovp, iovcnt));
430 if (rv == -1)
431 seterror(errno);
432 free(iovp);
433
434 return rv;
435 }
436
437 int
438 rumpuser_clock_gettime(uint64_t *sec, uint64_t *nsec, enum rumpclock rclk)
439 {
440 struct timespec ts;
441 clockid_t clk;
442 int rv;
443
444 switch (rclk) {
445 case RUMPUSER_CLOCK_RELWALL:
446 clk = CLOCK_REALTIME;
447 break;
448 case RUMPUSER_CLOCK_ABSMONO:
449 #ifdef HAVE_CLOCK_NANOSLEEP
450 clk = CLOCK_MONOTONIC;
451 #else
452 clk = CLOCK_REALTIME;
453 #endif
454 break;
455 default:
456 abort();
457 }
458
459 rv = clock_gettime(clk, &ts);
460 if (rv == -1) {
461 return errno;
462 }
463 *sec = ts.tv_sec;
464 *nsec = ts.tv_nsec;
465
466 return 0;
467 }
468
469 int
470 rumpuser_clock_sleep(uint64_t sec, uint64_t nsec, enum rumpclock clk)
471 {
472 struct timespec rqt, rmt;
473 int nlocks;
474 int rv;
475
476 rumpuser__unschedule(0, &nlocks, NULL);
477
478 /*LINTED*/
479 rqt.tv_sec = sec;
480 /*LINTED*/
481 rqt.tv_nsec = nsec;
482
483 switch (clk) {
484 case RUMPUSER_CLOCK_RELWALL:
485 do {
486 rv = nanosleep(&rqt, &rmt);
487 rqt = rmt;
488 } while (rv == -1 && errno == EINTR);
489 if (rv == -1) {
490 rv = errno;
491 }
492 break;
493 case RUMPUSER_CLOCK_ABSMONO:
494 do {
495 #ifdef HAVE_CLOCK_NANOSLEEP
496 rv = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME,
497 &rqt, NULL);
498 #else
499 /* le/la/der/die/das sigh. timevalspec tailspin */
500 struct timespec ts, tsr;
501 clock_gettime(CLOCK_REALTIME, &ts);
502 if (ts.tv_sec == rqt.tv_sec ?
503 ts.tv_nsec > rqt.tv_nsec : ts.tv_sec > rqt.tv_sec) {
504 rv = 0;
505 } else {
506 tsr.tv_sec = rqt.tv_sec - ts.tv_sec;
507 tsr.tv_nsec = rqt.tv_nsec - ts.tv_nsec;
508 if (tsr.tv_nsec < 0) {
509 tsr.tv_sec--;
510 tsr.tv_nsec += 1000*1000*1000;
511 }
512 rv = nanosleep(&tsr, NULL);
513 }
514 #endif
515 } while (rv == -1 && errno == EINTR);
516 if (rv == -1) {
517 rv = errno;
518 }
519 break;
520 default:
521 abort();
522 }
523
524 rumpuser__reschedule(nlocks, NULL);
525 return rv;
526 }
527
528 int
529 rumpuser_getenv(const char *name, char *buf, size_t blen, int *error)
530 {
531
532 DOCALL(int, getenv_r(name, buf, blen));
533 }
534
535 int
536 rumpuser_gethostname(char *name, size_t namelen, int *error)
537 {
538 char tmp[MAXHOSTNAMELEN];
539
540 if (gethostname(tmp, sizeof(tmp)) == -1) {
541 snprintf(name, namelen, "rump-%05d.rumpdomain", (int)getpid());
542 } else {
543 snprintf(name, namelen, "rump-%05d.%s.rumpdomain",
544 (int)getpid(), tmp);
545 }
546
547 *error = 0;
548 return 0;
549 }
550
551 int
552 rumpuser_putchar(int c, int *error)
553 {
554
555 DOCALL(int, (putchar(c)));
556 }
557
558 void
559 rumpuser_exit(int rv)
560 {
561
562 if (rv == RUMPUSER_PANIC)
563 abort();
564 else
565 exit(rv);
566 }
567
568 void
569 rumpuser_seterrno(int error)
570 {
571
572 errno = error;
573 }
574
575 /*
576 * This is meant for safe debugging prints from the kernel.
577 */
578 int
579 rumpuser_dprintf(const char *format, ...)
580 {
581 va_list ap;
582 int rv;
583
584 va_start(ap, format);
585 rv = vfprintf(stderr, format, ap);
586 va_end(ap);
587
588 return rv;
589 }
590
591 int
592 rumpuser_kill(int64_t pid, int sig, int *error)
593 {
594
595 #ifdef __NetBSD__
596 if (pid == RUMPUSER_PID_SELF) {
597 DOCALL(int, raise(sig));
598 } else {
599 DOCALL(int, kill((pid_t)pid, sig));
600 }
601 #else
602 /* XXXfixme: signal numbers may not match on non-NetBSD */
603 seterror(EOPNOTSUPP);
604 return -1;
605 #endif
606 }
607
608 int
609 rumpuser_getnhostcpu(void)
610 {
611 int ncpu = 1;
612
613 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
614 size_t sz = sizeof(ncpu);
615
616 sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0);
617 #elif defined(__linux__) || defined(__CYGWIN__)
618 FILE *fp;
619 char *line = NULL;
620 size_t n = 0;
621
622 /* If anyone knows a better way, I'm all ears */
623 if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
624 ncpu = 0;
625 while (getline(&line, &n, fp) != -1) {
626 if (strncmp(line,
627 "processor", sizeof("processor")-1) == 0)
628 ncpu++;
629 }
630 if (ncpu == 0)
631 ncpu = 1;
632 free(line);
633 fclose(fp);
634 }
635 #elif __sun__
636 /* XXX: this is just a rough estimate ... */
637 ncpu = sysconf(_SC_NPROCESSORS_ONLN);
638 #endif
639
640 return ncpu;
641 }
642
643 size_t
644 rumpuser_getrandom(void *buf, size_t buflen, int flags)
645 {
646 size_t origlen = buflen;
647 uint32_t *p = buf;
648 uint32_t tmp;
649 int chunk;
650
651 do {
652 chunk = buflen < 4 ? buflen : 4; /* portable MIN ... */
653 tmp = RUMPUSER_RANDOM();
654 memcpy(p, &tmp, chunk);
655 p++;
656 buflen -= chunk;
657 } while (chunk);
658
659 return origlen;
660 }
661