rumpuser.c revision 1.55 1 /* $NetBSD: rumpuser.c,v 1.55 2013/10/27 16:39:46 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "rumpuser_port.h"
29
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser.c,v 1.55 2013/10/27 16:39:46 rmind Exp $");
32 #endif /* !lint */
33
34 #include <sys/ioctl.h>
35 #include <sys/mman.h>
36 #include <sys/uio.h>
37 #include <sys/stat.h>
38 #include <sys/time.h>
39
40 #ifdef __NetBSD__
41 #include <sys/disk.h>
42 #include <sys/disklabel.h>
43 #include <sys/dkio.h>
44 #endif
45
46 #if defined(__NetBSD__) || defined(__FreeBSD__) || \
47 defined(__DragonFly__) || defined(__APPLE__)
48 #define __BSD__
49 #endif
50
51 #if defined(__BSD__)
52 #include <sys/sysctl.h>
53 #endif
54
55 #include <assert.h>
56 #include <errno.h>
57 #include <fcntl.h>
58 #include <netdb.h>
59 #include <signal.h>
60 #include <stdarg.h>
61 #include <stdint.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <time.h>
66 #include <unistd.h>
67
68 #include <rump/rumpuser.h>
69
70 #include "rumpuser_int.h"
71
72 struct rumpuser_hyperup rumpuser__hyp;
73
74 int
75 rumpuser_init(int version, const struct rumpuser_hyperup *hyp)
76 {
77
78 if (version != RUMPUSER_VERSION) {
79 fprintf(stderr, "rumpuser mismatch, kern: %d, hypervisor %d\n",
80 version, RUMPUSER_VERSION);
81 return 1;
82 }
83
84 #ifdef RUMPUSER_USE_DEVRANDOM
85 uint32_t rv;
86 int fd;
87
88 if ((fd = open("/dev/urandom", O_RDONLY)) == -1) {
89 srandom(time(NULL));
90 } else {
91 if (read(fd, &rv, sizeof(rv)) != sizeof(rv))
92 srandom(time(NULL));
93 else
94 srandom(rv);
95 close(fd);
96 }
97 #endif
98
99 rumpuser__thrinit();
100 rumpuser__hyp = *hyp;
101
102 return 0;
103 }
104
105 int
106 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp)
107 {
108 struct stat sb;
109 uint64_t size = 0;
110 int needsdev = 0, rv = 0, ft = 0;
111 int fd = -1;
112
113 if (stat(path, &sb) == -1) {
114 rv = errno;
115 goto out;
116 }
117
118 switch (sb.st_mode & S_IFMT) {
119 case S_IFDIR:
120 ft = RUMPUSER_FT_DIR;
121 break;
122 case S_IFREG:
123 ft = RUMPUSER_FT_REG;
124 break;
125 case S_IFBLK:
126 ft = RUMPUSER_FT_BLK;
127 needsdev = 1;
128 break;
129 case S_IFCHR:
130 ft = RUMPUSER_FT_CHR;
131 needsdev = 1;
132 break;
133 default:
134 ft = RUMPUSER_FT_OTHER;
135 break;
136 }
137
138 if (!needsdev) {
139 size = sb.st_size;
140 } else if (sizep) {
141 /*
142 * Welcome to the jungle. Of course querying the kernel
143 * for a device partition size is supposed to be far from
144 * trivial. On NetBSD we use ioctl. On $other platform
145 * we have a problem. We try "the lseek trick" and just
146 * fail if that fails. Platform specific code can later
147 * be written here if appropriate.
148 *
149 * On NetBSD we hope and pray that for block devices nobody
150 * else is holding them open, because otherwise the kernel
151 * will not permit us to open it. Thankfully, this is
152 * usually called only in bootstrap and then we can
153 * forget about it.
154 */
155 #ifndef __NetBSD__
156 off_t off;
157
158 fd = open(path, O_RDONLY);
159 if (fd == -1) {
160 rv = errno;
161 goto out;
162 }
163
164 off = lseek(fd, 0, SEEK_END);
165 if (off != 0) {
166 size = off;
167 goto out;
168 }
169 fprintf(stderr, "error: device size query not implemented on "
170 "this platform\n");
171 rv = EOPNOTSUPP;
172 goto out;
173 #else
174 struct disklabel lab;
175 struct partition *parta;
176 struct dkwedge_info dkw;
177
178 fd = open(path, O_RDONLY);
179 if (fd == -1) {
180 rv = errno;
181 goto out;
182 }
183
184 if (ioctl(fd, DIOCGDINFO, &lab) == 0) {
185 parta = &lab.d_partitions[DISKPART(sb.st_rdev)];
186 size = (uint64_t)lab.d_secsize * parta->p_size;
187 goto out;
188 }
189
190 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) {
191 /*
192 * XXX: should use DIOCGDISKINFO to query
193 * sector size, but that requires proplib,
194 * so just don't bother for now. it's nice
195 * that something as difficult as figuring out
196 * a partition's size has been made so easy.
197 */
198 size = dkw.dkw_size << DEV_BSHIFT;
199 goto out;
200 }
201
202 rv = errno;
203 #endif /* __NetBSD__ */
204 }
205
206 out:
207 if (rv == 0 && sizep)
208 *sizep = size;
209 if (rv == 0 && ftp)
210 *ftp = ft;
211 if (fd != -1)
212 close(fd);
213
214 ET(rv);
215 }
216
217 int
218 rumpuser_malloc(size_t howmuch, int alignment, void **memp)
219 {
220 void *mem;
221 int rv;
222
223 if (alignment == 0)
224 alignment = sizeof(void *);
225
226 rv = posix_memalign(&mem, (size_t)alignment, howmuch);
227 if (__predict_false(rv != 0)) {
228 if (rv == EINVAL) {
229 printf("rumpuser_malloc: invalid alignment %d\n",
230 alignment);
231 abort();
232 }
233 }
234
235 *memp = mem;
236 ET(rv);
237 }
238
239 /*ARGSUSED1*/
240 void
241 rumpuser_free(void *ptr, size_t size)
242 {
243
244 free(ptr);
245 }
246
247 int
248 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit,
249 int exec, void **memp)
250 {
251 void *mem;
252 int prot, rv;
253
254 #ifndef MAP_ALIGNED
255 #define MAP_ALIGNED(a) 0
256 if (alignbit)
257 fprintf(stderr, "rumpuser_anonmmap: warning, requested "
258 "alignment not supported by hypervisor\n");
259 #endif
260
261 prot = PROT_READ|PROT_WRITE;
262 if (exec)
263 prot |= PROT_EXEC;
264 mem = mmap(prefaddr, size, prot,
265 MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(alignbit), -1, 0);
266 if (mem == MAP_FAILED) {
267 rv = errno;
268 } else {
269 *memp = mem;
270 rv = 0;
271 }
272
273 ET(rv);
274 }
275
276 void
277 rumpuser_unmap(void *addr, size_t len)
278 {
279
280 munmap(addr, len);
281 }
282
283 int
284 rumpuser_open(const char *path, int ruflags, int *fdp)
285 {
286 int fd, flags, rv;
287
288 switch (ruflags & RUMPUSER_OPEN_ACCMODE) {
289 case RUMPUSER_OPEN_RDONLY:
290 flags = O_RDONLY;
291 break;
292 case RUMPUSER_OPEN_WRONLY:
293 flags = O_WRONLY;
294 break;
295 case RUMPUSER_OPEN_RDWR:
296 flags = O_RDWR;
297 break;
298 default:
299 rv = EINVAL;
300 goto out;
301 }
302
303 #define TESTSET(_ru_, _h_) if (ruflags & _ru_) flags |= _h_;
304 TESTSET(RUMPUSER_OPEN_CREATE, O_CREAT);
305 TESTSET(RUMPUSER_OPEN_EXCL, O_EXCL);
306 #undef TESTSET
307
308 KLOCK_WRAP(fd = open(path, flags, 0644));
309 if (fd == -1) {
310 rv = errno;
311 } else {
312 *fdp = fd;
313 rv = 0;
314 }
315
316 out:
317 ET(rv);
318 }
319
320 int
321 rumpuser_close(int fd)
322 {
323 int nlocks;
324
325 rumpkern_unsched(&nlocks, NULL);
326 fsync(fd);
327 close(fd);
328 rumpkern_sched(nlocks, NULL);
329
330 ET(0);
331 }
332
333 /*
334 * Assume "struct rumpuser_iovec" and "struct iovec" are the same.
335 * If you encounter POSIX platforms where they aren't, add some
336 * translation for iovlen > 1.
337 */
338 int
339 rumpuser_iovread(int fd, struct rumpuser_iovec *ruiov, size_t iovlen,
340 int64_t roff, size_t *retp)
341 {
342 struct iovec *iov = (struct iovec *)ruiov;
343 off_t off = (off_t)roff;
344 ssize_t nn;
345 int rv;
346
347 if (off == RUMPUSER_IOV_NOSEEK) {
348 KLOCK_WRAP(nn = readv(fd, iov, iovlen));
349 } else {
350 int nlocks;
351
352 rumpkern_unsched(&nlocks, NULL);
353 if (lseek(fd, off, SEEK_SET) == off) {
354 nn = readv(fd, iov, iovlen);
355 } else {
356 nn = -1;
357 }
358 rumpkern_sched(nlocks, NULL);
359 }
360
361 if (nn == -1) {
362 rv = errno;
363 } else {
364 *retp = (size_t)nn;
365 rv = 0;
366 }
367
368 ET(rv);
369 }
370
371 int
372 rumpuser_iovwrite(int fd, const struct rumpuser_iovec *ruiov, size_t iovlen,
373 int64_t roff, size_t *retp)
374 {
375 const struct iovec *iov = (const struct iovec *)ruiov;
376 off_t off = (off_t)roff;
377 ssize_t nn;
378 int rv;
379
380 if (off == RUMPUSER_IOV_NOSEEK) {
381 KLOCK_WRAP(nn = writev(fd, iov, iovlen));
382 } else {
383 int nlocks;
384
385 rumpkern_unsched(&nlocks, NULL);
386 if (lseek(fd, off, SEEK_SET) == off) {
387 nn = writev(fd, iov, iovlen);
388 } else {
389 nn = -1;
390 }
391 rumpkern_sched(nlocks, NULL);
392 }
393
394 if (nn == -1) {
395 rv = errno;
396 } else {
397 *retp = (size_t)nn;
398 rv = 0;
399 }
400
401 ET(rv);
402 }
403
404 int
405 rumpuser_syncfd(int fd, int flags, uint64_t start, uint64_t len)
406 {
407 int rv = 0;
408
409 /*
410 * For now, assume fd is regular file and does not care
411 * about read syncing
412 */
413 if ((flags & RUMPUSER_SYNCFD_BOTH) == 0) {
414 rv = EINVAL;
415 goto out;
416 }
417 if ((flags & RUMPUSER_SYNCFD_WRITE) == 0) {
418 rv = 0;
419 goto out;
420 }
421
422 #ifdef __NetBSD__
423 {
424 int fsflags = FDATASYNC;
425
426 if (fsflags & RUMPUSER_SYNCFD_SYNC)
427 fsflags |= FDISKSYNC;
428 if (fsync_range(fd, fsflags, start, len) == -1)
429 rv = errno;
430 }
431 #else
432 /* el-simplo */
433 if (fsync(fd) == -1)
434 rv = errno;
435 #endif
436
437 out:
438 ET(rv);
439 }
440
441 int
442 rumpuser_clock_gettime(int enum_rumpclock, int64_t *sec, long *nsec)
443 {
444 enum rumpclock rclk = enum_rumpclock;
445 struct timespec ts;
446 clockid_t clk;
447 int rv;
448
449 switch (rclk) {
450 case RUMPUSER_CLOCK_RELWALL:
451 clk = CLOCK_REALTIME;
452 break;
453 case RUMPUSER_CLOCK_ABSMONO:
454 #ifdef HAVE_CLOCK_NANOSLEEP
455 clk = CLOCK_MONOTONIC;
456 #else
457 clk = CLOCK_REALTIME;
458 #endif
459 break;
460 default:
461 abort();
462 }
463
464 if (clock_gettime(clk, &ts) == -1) {
465 rv = errno;
466 } else {
467 *sec = ts.tv_sec;
468 *nsec = ts.tv_nsec;
469 rv = 0;
470 }
471
472 ET(rv);
473 }
474
475 int
476 rumpuser_clock_sleep(int enum_rumpclock, int64_t sec, long nsec)
477 {
478 enum rumpclock rclk = enum_rumpclock;
479 struct timespec rqt, rmt;
480 int nlocks;
481 int rv;
482
483 rumpkern_unsched(&nlocks, NULL);
484
485 /*LINTED*/
486 rqt.tv_sec = sec;
487 /*LINTED*/
488 rqt.tv_nsec = nsec;
489
490 switch (rclk) {
491 case RUMPUSER_CLOCK_RELWALL:
492 do {
493 rv = nanosleep(&rqt, &rmt);
494 rqt = rmt;
495 } while (rv == -1 && errno == EINTR);
496 if (rv == -1) {
497 rv = errno;
498 }
499 break;
500 case RUMPUSER_CLOCK_ABSMONO:
501 do {
502 #ifdef HAVE_CLOCK_NANOSLEEP
503 rv = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME,
504 &rqt, NULL);
505 #else
506 /* le/la/der/die/das sigh. timevalspec tailspin */
507 struct timespec ts, tsr;
508 clock_gettime(CLOCK_REALTIME, &ts);
509 if (ts.tv_sec == rqt.tv_sec ?
510 ts.tv_nsec > rqt.tv_nsec : ts.tv_sec > rqt.tv_sec) {
511 rv = 0;
512 } else {
513 tsr.tv_sec = rqt.tv_sec - ts.tv_sec;
514 tsr.tv_nsec = rqt.tv_nsec - ts.tv_nsec;
515 if (tsr.tv_nsec < 0) {
516 tsr.tv_sec--;
517 tsr.tv_nsec += 1000*1000*1000;
518 }
519 rv = nanosleep(&tsr, NULL);
520 }
521 #endif
522 } while (rv == -1 && errno == EINTR);
523 if (rv == -1) {
524 rv = errno;
525 }
526 break;
527 default:
528 abort();
529 }
530
531 rumpkern_sched(nlocks, NULL);
532
533 ET(rv);
534 }
535
536 static int
537 gethostncpu(void)
538 {
539 int ncpu = 1;
540
541 #if defined(__BSD__)
542 size_t sz = sizeof(ncpu);
543
544 sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0);
545 #elif defined(__linux__) || defined(__CYGWIN__)
546 FILE *fp;
547 char *line = NULL;
548 size_t n = 0;
549
550 /* If anyone knows a better way, I'm all ears */
551 if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
552 ncpu = 0;
553 while (getline(&line, &n, fp) != -1) {
554 if (strncmp(line,
555 "processor", sizeof("processor")-1) == 0)
556 ncpu++;
557 }
558 if (ncpu == 0)
559 ncpu = 1;
560 free(line);
561 fclose(fp);
562 }
563 #elif __sun__
564 /* XXX: this is just a rough estimate ... */
565 ncpu = sysconf(_SC_NPROCESSORS_ONLN);
566 #endif
567
568 return ncpu;
569 }
570
571 int
572 rumpuser_getparam(const char *name, void *buf, size_t blen)
573 {
574 int rv;
575
576 if (strcmp(name, RUMPUSER_PARAM_NCPU) == 0) {
577 int ncpu;
578
579 if (getenv_r("RUMP_NCPU", buf, blen) == -1) {
580 sprintf(buf, "2"); /* default */
581 } else if (strcmp(buf, "host") == 0) {
582 ncpu = gethostncpu();
583 snprintf(buf, blen, "%d", ncpu);
584 }
585 rv = 0;
586 } else if (strcmp(name, RUMPUSER_PARAM_HOSTNAME) == 0) {
587 char tmp[MAXHOSTNAMELEN];
588
589 if (gethostname(tmp, sizeof(tmp)) == -1) {
590 snprintf(buf, blen, "rump-%05d", (int)getpid());
591 } else {
592 snprintf(buf, blen, "rump-%05d.%s",
593 (int)getpid(), tmp);
594 }
595 rv = 0;
596 } else if (*name == '_') {
597 rv = EINVAL;
598 } else {
599 if (getenv_r(name, buf, blen) == -1)
600 rv = errno;
601 else
602 rv = 0;
603 }
604
605 ET(rv);
606 }
607
608 void
609 rumpuser_putchar(int c)
610 {
611
612 putchar(c);
613 }
614
615 __dead void
616 rumpuser_exit(int rv)
617 {
618
619 if (rv == RUMPUSER_PANIC)
620 abort();
621 else
622 exit(rv);
623 }
624
625 void
626 rumpuser_seterrno(int error)
627 {
628
629 errno = error;
630 }
631
632 /*
633 * This is meant for safe debugging prints from the kernel.
634 */
635 void
636 rumpuser_dprintf(const char *format, ...)
637 {
638 va_list ap;
639
640 va_start(ap, format);
641 vfprintf(stderr, format, ap);
642 va_end(ap);
643 }
644
645 int
646 rumpuser_kill(int64_t pid, int sig)
647 {
648 int rv;
649
650 #ifdef __NetBSD__
651 int error;
652
653 if (pid == RUMPUSER_PID_SELF) {
654 error = raise(sig);
655 } else {
656 error = kill((pid_t)pid, sig);
657 }
658 if (error == -1)
659 rv = errno;
660 else
661 rv = 0;
662 #else
663 /* XXXfixme: signal numbers may not match on non-NetBSD */
664 rv = EOPNOTSUPP;
665 #endif
666
667 ET(rv);
668 }
669
670 int
671 rumpuser_getrandom(void *buf, size_t buflen, int flags, size_t *retp)
672 {
673 size_t origlen = buflen;
674 uint32_t *p = buf;
675 uint32_t tmp;
676 int chunk;
677
678 do {
679 chunk = buflen < 4 ? buflen : 4; /* portable MIN ... */
680 tmp = RUMPUSER_RANDOM();
681 memcpy(p, &tmp, chunk);
682 p++;
683 buflen -= chunk;
684 } while (chunk);
685
686 *retp = origlen;
687 ET(0);
688 }
689