rumpuser.c revision 1.54 1 /* $NetBSD: rumpuser.c,v 1.54 2013/08/14 08:29:25 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "rumpuser_port.h"
29
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser.c,v 1.54 2013/08/14 08:29:25 pooka Exp $");
32 #endif /* !lint */
33
34 #include <sys/ioctl.h>
35 #include <sys/mman.h>
36 #include <sys/uio.h>
37 #include <sys/stat.h>
38 #include <sys/time.h>
39
40 #ifdef __NetBSD__
41 #include <sys/disk.h>
42 #include <sys/disklabel.h>
43 #include <sys/dkio.h>
44 #endif
45
46 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
47 #include <sys/sysctl.h>
48 #endif
49
50 #include <assert.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <netdb.h>
54 #include <signal.h>
55 #include <stdarg.h>
56 #include <stdint.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <time.h>
61 #include <unistd.h>
62
63 #include <rump/rumpuser.h>
64
65 #include "rumpuser_int.h"
66
67 struct rumpuser_hyperup rumpuser__hyp;
68
69 int
70 rumpuser_init(int version, const struct rumpuser_hyperup *hyp)
71 {
72
73 if (version != RUMPUSER_VERSION) {
74 fprintf(stderr, "rumpuser mismatch, kern: %d, hypervisor %d\n",
75 version, RUMPUSER_VERSION);
76 return 1;
77 }
78
79 #ifdef RUMPUSER_USE_DEVRANDOM
80 uint32_t rv;
81 int fd;
82
83 if ((fd = open("/dev/urandom", O_RDONLY)) == -1) {
84 srandom(time(NULL));
85 } else {
86 if (read(fd, &rv, sizeof(rv)) != sizeof(rv))
87 srandom(time(NULL));
88 else
89 srandom(rv);
90 close(fd);
91 }
92 #endif
93
94 rumpuser__thrinit();
95 rumpuser__hyp = *hyp;
96
97 return 0;
98 }
99
100 int
101 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp)
102 {
103 struct stat sb;
104 uint64_t size = 0;
105 int needsdev = 0, rv = 0, ft = 0;
106 int fd = -1;
107
108 if (stat(path, &sb) == -1) {
109 rv = errno;
110 goto out;
111 }
112
113 switch (sb.st_mode & S_IFMT) {
114 case S_IFDIR:
115 ft = RUMPUSER_FT_DIR;
116 break;
117 case S_IFREG:
118 ft = RUMPUSER_FT_REG;
119 break;
120 case S_IFBLK:
121 ft = RUMPUSER_FT_BLK;
122 needsdev = 1;
123 break;
124 case S_IFCHR:
125 ft = RUMPUSER_FT_CHR;
126 needsdev = 1;
127 break;
128 default:
129 ft = RUMPUSER_FT_OTHER;
130 break;
131 }
132
133 if (!needsdev) {
134 size = sb.st_size;
135 } else if (sizep) {
136 /*
137 * Welcome to the jungle. Of course querying the kernel
138 * for a device partition size is supposed to be far from
139 * trivial. On NetBSD we use ioctl. On $other platform
140 * we have a problem. We try "the lseek trick" and just
141 * fail if that fails. Platform specific code can later
142 * be written here if appropriate.
143 *
144 * On NetBSD we hope and pray that for block devices nobody
145 * else is holding them open, because otherwise the kernel
146 * will not permit us to open it. Thankfully, this is
147 * usually called only in bootstrap and then we can
148 * forget about it.
149 */
150 #ifndef __NetBSD__
151 off_t off;
152
153 fd = open(path, O_RDONLY);
154 if (fd == -1) {
155 rv = errno;
156 goto out;
157 }
158
159 off = lseek(fd, 0, SEEK_END);
160 if (off != 0) {
161 size = off;
162 goto out;
163 }
164 fprintf(stderr, "error: device size query not implemented on "
165 "this platform\n");
166 rv = EOPNOTSUPP;
167 goto out;
168 #else
169 struct disklabel lab;
170 struct partition *parta;
171 struct dkwedge_info dkw;
172
173 fd = open(path, O_RDONLY);
174 if (fd == -1) {
175 rv = errno;
176 goto out;
177 }
178
179 if (ioctl(fd, DIOCGDINFO, &lab) == 0) {
180 parta = &lab.d_partitions[DISKPART(sb.st_rdev)];
181 size = (uint64_t)lab.d_secsize * parta->p_size;
182 goto out;
183 }
184
185 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) {
186 /*
187 * XXX: should use DIOCGDISKINFO to query
188 * sector size, but that requires proplib,
189 * so just don't bother for now. it's nice
190 * that something as difficult as figuring out
191 * a partition's size has been made so easy.
192 */
193 size = dkw.dkw_size << DEV_BSHIFT;
194 goto out;
195 }
196
197 rv = errno;
198 #endif /* __NetBSD__ */
199 }
200
201 out:
202 if (rv == 0 && sizep)
203 *sizep = size;
204 if (rv == 0 && ftp)
205 *ftp = ft;
206 if (fd != -1)
207 close(fd);
208
209 ET(rv);
210 }
211
212 int
213 rumpuser_malloc(size_t howmuch, int alignment, void **memp)
214 {
215 void *mem;
216 int rv;
217
218 if (alignment == 0)
219 alignment = sizeof(void *);
220
221 rv = posix_memalign(&mem, (size_t)alignment, howmuch);
222 if (__predict_false(rv != 0)) {
223 if (rv == EINVAL) {
224 printf("rumpuser_malloc: invalid alignment %d\n",
225 alignment);
226 abort();
227 }
228 }
229
230 *memp = mem;
231 ET(rv);
232 }
233
234 /*ARGSUSED1*/
235 void
236 rumpuser_free(void *ptr, size_t size)
237 {
238
239 free(ptr);
240 }
241
242 int
243 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit,
244 int exec, void **memp)
245 {
246 void *mem;
247 int prot, rv;
248
249 #ifndef MAP_ALIGNED
250 #define MAP_ALIGNED(a) 0
251 if (alignbit)
252 fprintf(stderr, "rumpuser_anonmmap: warning, requested "
253 "alignment not supported by hypervisor\n");
254 #endif
255
256 prot = PROT_READ|PROT_WRITE;
257 if (exec)
258 prot |= PROT_EXEC;
259 mem = mmap(prefaddr, size, prot,
260 MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(alignbit), -1, 0);
261 if (mem == MAP_FAILED) {
262 rv = errno;
263 } else {
264 *memp = mem;
265 rv = 0;
266 }
267
268 ET(rv);
269 }
270
271 void
272 rumpuser_unmap(void *addr, size_t len)
273 {
274
275 munmap(addr, len);
276 }
277
278 int
279 rumpuser_open(const char *path, int ruflags, int *fdp)
280 {
281 int fd, flags, rv;
282
283 switch (ruflags & RUMPUSER_OPEN_ACCMODE) {
284 case RUMPUSER_OPEN_RDONLY:
285 flags = O_RDONLY;
286 break;
287 case RUMPUSER_OPEN_WRONLY:
288 flags = O_WRONLY;
289 break;
290 case RUMPUSER_OPEN_RDWR:
291 flags = O_RDWR;
292 break;
293 default:
294 rv = EINVAL;
295 goto out;
296 }
297
298 #define TESTSET(_ru_, _h_) if (ruflags & _ru_) flags |= _h_;
299 TESTSET(RUMPUSER_OPEN_CREATE, O_CREAT);
300 TESTSET(RUMPUSER_OPEN_EXCL, O_EXCL);
301 #undef TESTSET
302
303 KLOCK_WRAP(fd = open(path, flags, 0644));
304 if (fd == -1) {
305 rv = errno;
306 } else {
307 *fdp = fd;
308 rv = 0;
309 }
310
311 out:
312 ET(rv);
313 }
314
315 int
316 rumpuser_close(int fd)
317 {
318 int nlocks;
319
320 rumpkern_unsched(&nlocks, NULL);
321 fsync(fd);
322 close(fd);
323 rumpkern_sched(nlocks, NULL);
324
325 ET(0);
326 }
327
328 /*
329 * Assume "struct rumpuser_iovec" and "struct iovec" are the same.
330 * If you encounter POSIX platforms where they aren't, add some
331 * translation for iovlen > 1.
332 */
333 int
334 rumpuser_iovread(int fd, struct rumpuser_iovec *ruiov, size_t iovlen,
335 int64_t roff, size_t *retp)
336 {
337 struct iovec *iov = (struct iovec *)ruiov;
338 off_t off = (off_t)roff;
339 ssize_t nn;
340 int rv;
341
342 if (off == RUMPUSER_IOV_NOSEEK) {
343 KLOCK_WRAP(nn = readv(fd, iov, iovlen));
344 } else {
345 int nlocks;
346
347 rumpkern_unsched(&nlocks, NULL);
348 if (lseek(fd, off, SEEK_SET) == off) {
349 nn = readv(fd, iov, iovlen);
350 } else {
351 nn = -1;
352 }
353 rumpkern_sched(nlocks, NULL);
354 }
355
356 if (nn == -1) {
357 rv = errno;
358 } else {
359 *retp = (size_t)nn;
360 rv = 0;
361 }
362
363 ET(rv);
364 }
365
366 int
367 rumpuser_iovwrite(int fd, const struct rumpuser_iovec *ruiov, size_t iovlen,
368 int64_t roff, size_t *retp)
369 {
370 const struct iovec *iov = (const struct iovec *)ruiov;
371 off_t off = (off_t)roff;
372 ssize_t nn;
373 int rv;
374
375 if (off == RUMPUSER_IOV_NOSEEK) {
376 KLOCK_WRAP(nn = writev(fd, iov, iovlen));
377 } else {
378 int nlocks;
379
380 rumpkern_unsched(&nlocks, NULL);
381 if (lseek(fd, off, SEEK_SET) == off) {
382 nn = writev(fd, iov, iovlen);
383 } else {
384 nn = -1;
385 }
386 rumpkern_sched(nlocks, NULL);
387 }
388
389 if (nn == -1) {
390 rv = errno;
391 } else {
392 *retp = (size_t)nn;
393 rv = 0;
394 }
395
396 ET(rv);
397 }
398
399 int
400 rumpuser_syncfd(int fd, int flags, uint64_t start, uint64_t len)
401 {
402 int rv = 0;
403
404 /*
405 * For now, assume fd is regular file and does not care
406 * about read syncing
407 */
408 if ((flags & RUMPUSER_SYNCFD_BOTH) == 0) {
409 rv = EINVAL;
410 goto out;
411 }
412 if ((flags & RUMPUSER_SYNCFD_WRITE) == 0) {
413 rv = 0;
414 goto out;
415 }
416
417 #ifdef __NetBSD__
418 {
419 int fsflags = FDATASYNC;
420
421 if (fsflags & RUMPUSER_SYNCFD_SYNC)
422 fsflags |= FDISKSYNC;
423 if (fsync_range(fd, fsflags, start, len) == -1)
424 rv = errno;
425 }
426 #else
427 /* el-simplo */
428 if (fsync(fd) == -1)
429 rv = errno;
430 #endif
431
432 out:
433 ET(rv);
434 }
435
436 int
437 rumpuser_clock_gettime(int enum_rumpclock, int64_t *sec, long *nsec)
438 {
439 enum rumpclock rclk = enum_rumpclock;
440 struct timespec ts;
441 clockid_t clk;
442 int rv;
443
444 switch (rclk) {
445 case RUMPUSER_CLOCK_RELWALL:
446 clk = CLOCK_REALTIME;
447 break;
448 case RUMPUSER_CLOCK_ABSMONO:
449 #ifdef HAVE_CLOCK_NANOSLEEP
450 clk = CLOCK_MONOTONIC;
451 #else
452 clk = CLOCK_REALTIME;
453 #endif
454 break;
455 default:
456 abort();
457 }
458
459 if (clock_gettime(clk, &ts) == -1) {
460 rv = errno;
461 } else {
462 *sec = ts.tv_sec;
463 *nsec = ts.tv_nsec;
464 rv = 0;
465 }
466
467 ET(rv);
468 }
469
470 int
471 rumpuser_clock_sleep(int enum_rumpclock, int64_t sec, long nsec)
472 {
473 enum rumpclock rclk = enum_rumpclock;
474 struct timespec rqt, rmt;
475 int nlocks;
476 int rv;
477
478 rumpkern_unsched(&nlocks, NULL);
479
480 /*LINTED*/
481 rqt.tv_sec = sec;
482 /*LINTED*/
483 rqt.tv_nsec = nsec;
484
485 switch (rclk) {
486 case RUMPUSER_CLOCK_RELWALL:
487 do {
488 rv = nanosleep(&rqt, &rmt);
489 rqt = rmt;
490 } while (rv == -1 && errno == EINTR);
491 if (rv == -1) {
492 rv = errno;
493 }
494 break;
495 case RUMPUSER_CLOCK_ABSMONO:
496 do {
497 #ifdef HAVE_CLOCK_NANOSLEEP
498 rv = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME,
499 &rqt, NULL);
500 #else
501 /* le/la/der/die/das sigh. timevalspec tailspin */
502 struct timespec ts, tsr;
503 clock_gettime(CLOCK_REALTIME, &ts);
504 if (ts.tv_sec == rqt.tv_sec ?
505 ts.tv_nsec > rqt.tv_nsec : ts.tv_sec > rqt.tv_sec) {
506 rv = 0;
507 } else {
508 tsr.tv_sec = rqt.tv_sec - ts.tv_sec;
509 tsr.tv_nsec = rqt.tv_nsec - ts.tv_nsec;
510 if (tsr.tv_nsec < 0) {
511 tsr.tv_sec--;
512 tsr.tv_nsec += 1000*1000*1000;
513 }
514 rv = nanosleep(&tsr, NULL);
515 }
516 #endif
517 } while (rv == -1 && errno == EINTR);
518 if (rv == -1) {
519 rv = errno;
520 }
521 break;
522 default:
523 abort();
524 }
525
526 rumpkern_sched(nlocks, NULL);
527
528 ET(rv);
529 }
530
531 static int
532 gethostncpu(void)
533 {
534 int ncpu = 1;
535
536 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
537 size_t sz = sizeof(ncpu);
538
539 sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0);
540 #elif defined(__linux__) || defined(__CYGWIN__)
541 FILE *fp;
542 char *line = NULL;
543 size_t n = 0;
544
545 /* If anyone knows a better way, I'm all ears */
546 if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
547 ncpu = 0;
548 while (getline(&line, &n, fp) != -1) {
549 if (strncmp(line,
550 "processor", sizeof("processor")-1) == 0)
551 ncpu++;
552 }
553 if (ncpu == 0)
554 ncpu = 1;
555 free(line);
556 fclose(fp);
557 }
558 #elif __sun__
559 /* XXX: this is just a rough estimate ... */
560 ncpu = sysconf(_SC_NPROCESSORS_ONLN);
561 #endif
562
563 return ncpu;
564 }
565
566 int
567 rumpuser_getparam(const char *name, void *buf, size_t blen)
568 {
569 int rv;
570
571 if (strcmp(name, RUMPUSER_PARAM_NCPU) == 0) {
572 int ncpu;
573
574 if (getenv_r("RUMP_NCPU", buf, blen) == -1) {
575 sprintf(buf, "2"); /* default */
576 } else if (strcmp(buf, "host") == 0) {
577 ncpu = gethostncpu();
578 snprintf(buf, blen, "%d", ncpu);
579 }
580 rv = 0;
581 } else if (strcmp(name, RUMPUSER_PARAM_HOSTNAME) == 0) {
582 char tmp[MAXHOSTNAMELEN];
583
584 if (gethostname(tmp, sizeof(tmp)) == -1) {
585 snprintf(buf, blen, "rump-%05d", (int)getpid());
586 } else {
587 snprintf(buf, blen, "rump-%05d.%s",
588 (int)getpid(), tmp);
589 }
590 rv = 0;
591 } else if (*name == '_') {
592 rv = EINVAL;
593 } else {
594 if (getenv_r(name, buf, blen) == -1)
595 rv = errno;
596 else
597 rv = 0;
598 }
599
600 ET(rv);
601 }
602
603 void
604 rumpuser_putchar(int c)
605 {
606
607 putchar(c);
608 }
609
610 void
611 rumpuser_exit(int rv)
612 {
613
614 if (rv == RUMPUSER_PANIC)
615 abort();
616 else
617 exit(rv);
618 }
619
620 void
621 rumpuser_seterrno(int error)
622 {
623
624 errno = error;
625 }
626
627 /*
628 * This is meant for safe debugging prints from the kernel.
629 */
630 void
631 rumpuser_dprintf(const char *format, ...)
632 {
633 va_list ap;
634
635 va_start(ap, format);
636 vfprintf(stderr, format, ap);
637 va_end(ap);
638 }
639
640 int
641 rumpuser_kill(int64_t pid, int sig)
642 {
643 int rv;
644
645 #ifdef __NetBSD__
646 int error;
647
648 if (pid == RUMPUSER_PID_SELF) {
649 error = raise(sig);
650 } else {
651 error = kill((pid_t)pid, sig);
652 }
653 if (error == -1)
654 rv = errno;
655 else
656 rv = 0;
657 #else
658 /* XXXfixme: signal numbers may not match on non-NetBSD */
659 rv = EOPNOTSUPP;
660 #endif
661
662 ET(rv);
663 }
664
665 int
666 rumpuser_getrandom(void *buf, size_t buflen, int flags, size_t *retp)
667 {
668 size_t origlen = buflen;
669 uint32_t *p = buf;
670 uint32_t tmp;
671 int chunk;
672
673 do {
674 chunk = buflen < 4 ? buflen : 4; /* portable MIN ... */
675 tmp = RUMPUSER_RANDOM();
676 memcpy(p, &tmp, chunk);
677 p++;
678 buflen -= chunk;
679 } while (chunk);
680
681 *retp = origlen;
682 ET(0);
683 }
684