rumpuser.c revision 1.41 1 /* $NetBSD: rumpuser.c,v 1.41 2013/04/29 14:51:39 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "rumpuser_port.h"
29
30 #if !defined(lint)
31 __RCSID("$NetBSD: rumpuser.c,v 1.41 2013/04/29 14:51:39 pooka Exp $");
32 #endif /* !lint */
33
34 #include <sys/ioctl.h>
35 #include <sys/mman.h>
36 #include <sys/uio.h>
37 #include <sys/stat.h>
38 #include <sys/time.h>
39
40 #ifdef __NetBSD__
41 #include <sys/disk.h>
42 #include <sys/disklabel.h>
43 #include <sys/dkio.h>
44 #endif
45
46 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
47 #include <sys/sysctl.h>
48 #endif
49
50 #include <assert.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <netdb.h>
54 #include <signal.h>
55 #include <stdarg.h>
56 #include <stdint.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <time.h>
61 #include <unistd.h>
62
63 #include <rump/rumpuser.h>
64
65 #include "rumpuser_int.h"
66
67 struct rumpuser_hyperup rumpuser__hyp;
68
69 int
70 rumpuser_init(int version, const struct rumpuser_hyperup *hyp)
71 {
72
73 if (version != RUMPUSER_VERSION) {
74 fprintf(stderr, "rumpuser mismatch, kern: %d, hypervisor %d\n",
75 version, RUMPUSER_VERSION);
76 return 1;
77 }
78
79 #ifdef RUMPUSER_USE_DEVRANDOM
80 uint32_t rv;
81 int fd;
82
83 if ((fd = open("/dev/urandom", O_RDONLY)) == -1) {
84 srandom(time(NULL));
85 } else {
86 if (read(fd, &rv, sizeof(rv)) != sizeof(rv))
87 srandom(time(NULL));
88 else
89 srandom(rv);
90 close(fd);
91 }
92 #endif
93
94 rumpuser__thrinit();
95 rumpuser__hyp = *hyp;
96
97 return 0;
98 }
99
100 int
101 rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp, int *error)
102 {
103 struct stat sb;
104 uint64_t size;
105 int needsdev = 0, rv = 0, ft;
106 int fd = -1;
107
108 if (stat(path, &sb) == -1) {
109 seterror(errno);
110 return -1;
111 }
112
113 switch (sb.st_mode & S_IFMT) {
114 case S_IFDIR:
115 ft = RUMPUSER_FT_DIR;
116 break;
117 case S_IFREG:
118 ft = RUMPUSER_FT_REG;
119 break;
120 case S_IFBLK:
121 ft = RUMPUSER_FT_BLK;
122 needsdev = 1;
123 break;
124 case S_IFCHR:
125 ft = RUMPUSER_FT_CHR;
126 needsdev = 1;
127 break;
128 default:
129 ft = RUMPUSER_FT_OTHER;
130 break;
131 }
132
133 if (!needsdev) {
134 size = sb.st_size;
135 } else if (sizep) {
136 /*
137 * Welcome to the jungle. Of course querying the kernel
138 * for a device partition size is supposed to be far from
139 * trivial. On NetBSD we use ioctl. On $other platform
140 * we have a problem. We try "the lseek trick" and just
141 * fail if that fails. Platform specific code can later
142 * be written here if appropriate.
143 *
144 * On NetBSD we hope and pray that for block devices nobody
145 * else is holding them open, because otherwise the kernel
146 * will not permit us to open it. Thankfully, this is
147 * usually called only in bootstrap and then we can
148 * forget about it.
149 */
150 #ifndef __NetBSD__
151 off_t off;
152
153 fd = open(path, O_RDONLY);
154 if (fd == -1) {
155 seterror(errno);
156 rv = -1;
157 goto out;
158 }
159
160 off = lseek(fd, 0, SEEK_END);
161 if (off != 0) {
162 size = off;
163 goto out;
164 }
165 fprintf(stderr, "error: device size query not implemented on "
166 "this platform\n");
167 seterror(EOPNOTSUPP);
168 rv = -1;
169 goto out;
170 #else
171 struct disklabel lab;
172 struct partition *parta;
173 struct dkwedge_info dkw;
174
175 fd = open(path, O_RDONLY);
176 if (fd == -1) {
177 seterror(errno);
178 rv = -1;
179 goto out;
180 }
181
182 if (ioctl(fd, DIOCGDINFO, &lab) == 0) {
183 parta = &lab.d_partitions[DISKPART(sb.st_rdev)];
184 size = (uint64_t)lab.d_secsize * parta->p_size;
185 goto out;
186 }
187
188 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) {
189 /*
190 * XXX: should use DIOCGDISKINFO to query
191 * sector size, but that requires proplib,
192 * so just don't bother for now. it's nice
193 * that something as difficult as figuring out
194 * a partition's size has been made so easy.
195 */
196 size = dkw.dkw_size << DEV_BSHIFT;
197 goto out;
198 }
199
200 seterror(errno);
201 rv = -1;
202 #endif /* __NetBSD__ */
203 }
204
205 out:
206 if (rv == 0 && sizep)
207 *sizep = size;
208 if (rv == 0 && ftp)
209 *ftp = ft;
210 if (fd != -1)
211 close(fd);
212
213 return rv;
214 }
215
216 void *
217 rumpuser_malloc(size_t howmuch, int alignment)
218 {
219 void *mem;
220 int rv;
221
222 if (alignment == 0)
223 alignment = sizeof(void *);
224
225 rv = posix_memalign(&mem, (size_t)alignment, howmuch);
226 if (__predict_false(rv != 0)) {
227 if (rv == EINVAL) {
228 printf("rumpuser_malloc: invalid alignment %d\n",
229 alignment);
230 abort();
231 }
232 mem = NULL;
233 }
234
235 return mem;
236 }
237
238 /*ARGSUSED1*/
239 void
240 rumpuser_free(void *ptr, size_t size)
241 {
242
243 free(ptr);
244 }
245
246 void *
247 rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit,
248 int exec, int *error)
249 {
250 void *rv;
251 int prot;
252
253 #ifndef MAP_ALIGNED
254 #define MAP_ALIGNED(a) 0
255 if (alignbit)
256 fprintf(stderr, "rumpuser_anonmmap: warning, requested "
257 "alignment not supported by hypervisor\n");
258 #endif
259
260 prot = PROT_READ|PROT_WRITE;
261 if (exec)
262 prot |= PROT_EXEC;
263 rv = mmap(prefaddr, size, prot,
264 MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(alignbit), -1, 0);
265 if (rv == MAP_FAILED) {
266 seterror(errno);
267 return NULL;
268 }
269 return rv;
270 }
271
272 void
273 rumpuser_unmap(void *addr, size_t len)
274 {
275 int rv;
276
277 rv = munmap(addr, len);
278 assert(rv == 0);
279 }
280
281 int
282 rumpuser_open(const char *path, int ruflags, int *error)
283 {
284 int flags;
285
286 switch (ruflags & RUMPUSER_OPEN_ACCMODE) {
287 case RUMPUSER_OPEN_RDONLY:
288 flags = O_RDONLY;
289 break;
290 case RUMPUSER_OPEN_WRONLY:
291 flags = O_WRONLY;
292 break;
293 case RUMPUSER_OPEN_RDWR:
294 flags = O_RDWR;
295 break;
296 default:
297 *error = EINVAL;
298 return -1;
299 }
300
301 #define TESTSET(_ru_, _h_) if (ruflags & _ru_) flags |= _h_;
302 TESTSET(RUMPUSER_OPEN_CREATE, O_CREAT);
303 TESTSET(RUMPUSER_OPEN_EXCL, O_EXCL);
304 #undef TESTSET
305
306 DOCALL_KLOCK(int, (open(path, flags, 0644)));
307 }
308
309 int
310 rumpuser_close(int fd, int *error)
311 {
312
313 DOCALL(int, close(fd));
314 }
315
316 int
317 rumpuser_fsync(int fd, int *error)
318 {
319
320 DOCALL_KLOCK(int, fsync(fd));
321 }
322
323 ssize_t
324 rumpuser_read(int fd, void *data, size_t size, int *error)
325 {
326 ssize_t rv;
327
328 KLOCK_WRAP(rv = read(fd, data, size));
329 if (rv == -1)
330 seterror(errno);
331
332 return rv;
333 }
334
335 ssize_t
336 rumpuser_pread(int fd, void *data, size_t size, off_t offset, int *error)
337 {
338 ssize_t rv;
339
340 KLOCK_WRAP(rv = pread(fd, data, size, offset));
341 if (rv == -1)
342 seterror(errno);
343
344 return rv;
345 }
346
347 ssize_t
348 rumpuser_write(int fd, const void *data, size_t size, int *error)
349 {
350 ssize_t rv;
351
352 KLOCK_WRAP(rv = write(fd, data, size));
353 if (rv == -1)
354 seterror(errno);
355
356 return rv;
357 }
358
359 ssize_t
360 rumpuser_pwrite(int fd, const void *data, size_t size, off_t offset, int *error)
361 {
362 ssize_t rv;
363
364 KLOCK_WRAP(rv = pwrite(fd, data, size, offset));
365 if (rv == -1)
366 seterror(errno);
367
368 return rv;
369 }
370
371 ssize_t
372 rumpuser_readv(int fd, const struct rumpuser_iovec *riov, int iovcnt,
373 int *error)
374 {
375 struct iovec *iovp;
376 ssize_t rv;
377 int i;
378
379 iovp = malloc(iovcnt * sizeof(struct iovec));
380 if (iovp == NULL) {
381 seterror(ENOMEM);
382 return -1;
383 }
384 for (i = 0; i < iovcnt; i++) {
385 iovp[i].iov_base = riov[i].iov_base;
386 /*LINTED*/
387 iovp[i].iov_len = riov[i].iov_len;
388 }
389
390 KLOCK_WRAP(rv = readv(fd, iovp, iovcnt));
391 if (rv == -1)
392 seterror(errno);
393 free(iovp);
394
395 return rv;
396 }
397
398 ssize_t
399 rumpuser_writev(int fd, const struct rumpuser_iovec *riov, int iovcnt,
400 int *error)
401 {
402 struct iovec *iovp;
403 ssize_t rv;
404 int i;
405
406 iovp = malloc(iovcnt * sizeof(struct iovec));
407 if (iovp == NULL) {
408 seterror(ENOMEM);
409 return -1;
410 }
411 for (i = 0; i < iovcnt; i++) {
412 iovp[i].iov_base = riov[i].iov_base;
413 /*LINTED*/
414 iovp[i].iov_len = riov[i].iov_len;
415 }
416
417 KLOCK_WRAP(rv = writev(fd, iovp, iovcnt));
418 if (rv == -1)
419 seterror(errno);
420 free(iovp);
421
422 return rv;
423 }
424
425 int
426 rumpuser_clock_gettime(uint64_t *sec, uint64_t *nsec, enum rumpclock rclk)
427 {
428 struct timespec ts;
429 clockid_t clk;
430 int rv;
431
432 switch (rclk) {
433 case RUMPUSER_CLOCK_RELWALL:
434 clk = CLOCK_REALTIME;
435 break;
436 case RUMPUSER_CLOCK_ABSMONO:
437 #ifdef HAVE_CLOCK_NANOSLEEP
438 clk = CLOCK_MONOTONIC;
439 #else
440 clk = CLOCK_REALTIME;
441 #endif
442 break;
443 default:
444 abort();
445 }
446
447 rv = clock_gettime(clk, &ts);
448 if (rv == -1) {
449 return errno;
450 }
451 *sec = ts.tv_sec;
452 *nsec = ts.tv_nsec;
453
454 return 0;
455 }
456
457 int
458 rumpuser_clock_sleep(uint64_t sec, uint64_t nsec, enum rumpclock clk)
459 {
460 struct timespec rqt, rmt;
461 int nlocks;
462 int rv;
463
464 rumpkern_unsched(&nlocks, NULL);
465
466 /*LINTED*/
467 rqt.tv_sec = sec;
468 /*LINTED*/
469 rqt.tv_nsec = nsec;
470
471 switch (clk) {
472 case RUMPUSER_CLOCK_RELWALL:
473 do {
474 rv = nanosleep(&rqt, &rmt);
475 rqt = rmt;
476 } while (rv == -1 && errno == EINTR);
477 if (rv == -1) {
478 rv = errno;
479 }
480 break;
481 case RUMPUSER_CLOCK_ABSMONO:
482 do {
483 #ifdef HAVE_CLOCK_NANOSLEEP
484 rv = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME,
485 &rqt, NULL);
486 #else
487 /* le/la/der/die/das sigh. timevalspec tailspin */
488 struct timespec ts, tsr;
489 clock_gettime(CLOCK_REALTIME, &ts);
490 if (ts.tv_sec == rqt.tv_sec ?
491 ts.tv_nsec > rqt.tv_nsec : ts.tv_sec > rqt.tv_sec) {
492 rv = 0;
493 } else {
494 tsr.tv_sec = rqt.tv_sec - ts.tv_sec;
495 tsr.tv_nsec = rqt.tv_nsec - ts.tv_nsec;
496 if (tsr.tv_nsec < 0) {
497 tsr.tv_sec--;
498 tsr.tv_nsec += 1000*1000*1000;
499 }
500 rv = nanosleep(&tsr, NULL);
501 }
502 #endif
503 } while (rv == -1 && errno == EINTR);
504 if (rv == -1) {
505 rv = errno;
506 }
507 break;
508 default:
509 abort();
510 }
511
512 rumpkern_sched(nlocks, NULL);
513 return rv;
514 }
515
516 int
517 rumpuser_getenv(const char *name, char *buf, size_t blen, int *error)
518 {
519
520 DOCALL(int, getenv_r(name, buf, blen));
521 }
522
523 int
524 rumpuser_gethostname(char *name, size_t namelen, int *error)
525 {
526 char tmp[MAXHOSTNAMELEN];
527
528 if (gethostname(tmp, sizeof(tmp)) == -1) {
529 snprintf(name, namelen, "rump-%05d.rumpdomain", (int)getpid());
530 } else {
531 snprintf(name, namelen, "rump-%05d.%s.rumpdomain",
532 (int)getpid(), tmp);
533 }
534
535 *error = 0;
536 return 0;
537 }
538
539 int
540 rumpuser_putchar(int c, int *error)
541 {
542
543 DOCALL(int, (putchar(c)));
544 }
545
546 void
547 rumpuser_exit(int rv)
548 {
549
550 if (rv == RUMPUSER_PANIC)
551 abort();
552 else
553 exit(rv);
554 }
555
556 void
557 rumpuser_seterrno(int error)
558 {
559
560 errno = error;
561 }
562
563 /*
564 * This is meant for safe debugging prints from the kernel.
565 */
566 int
567 rumpuser_dprintf(const char *format, ...)
568 {
569 va_list ap;
570 int rv;
571
572 va_start(ap, format);
573 rv = vfprintf(stderr, format, ap);
574 va_end(ap);
575
576 return rv;
577 }
578
579 int
580 rumpuser_kill(int64_t pid, int sig, int *error)
581 {
582
583 #ifdef __NetBSD__
584 if (pid == RUMPUSER_PID_SELF) {
585 DOCALL(int, raise(sig));
586 } else {
587 DOCALL(int, kill((pid_t)pid, sig));
588 }
589 #else
590 /* XXXfixme: signal numbers may not match on non-NetBSD */
591 seterror(EOPNOTSUPP);
592 return -1;
593 #endif
594 }
595
596 int
597 rumpuser_getnhostcpu(void)
598 {
599 int ncpu = 1;
600
601 #if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
602 size_t sz = sizeof(ncpu);
603
604 sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0);
605 #elif defined(__linux__) || defined(__CYGWIN__)
606 FILE *fp;
607 char *line = NULL;
608 size_t n = 0;
609
610 /* If anyone knows a better way, I'm all ears */
611 if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
612 ncpu = 0;
613 while (getline(&line, &n, fp) != -1) {
614 if (strncmp(line,
615 "processor", sizeof("processor")-1) == 0)
616 ncpu++;
617 }
618 if (ncpu == 0)
619 ncpu = 1;
620 free(line);
621 fclose(fp);
622 }
623 #elif __sun__
624 /* XXX: this is just a rough estimate ... */
625 ncpu = sysconf(_SC_NPROCESSORS_ONLN);
626 #endif
627
628 return ncpu;
629 }
630
631 size_t
632 rumpuser_getrandom(void *buf, size_t buflen, int flags)
633 {
634 size_t origlen = buflen;
635 uint32_t *p = buf;
636 uint32_t tmp;
637 int chunk;
638
639 do {
640 chunk = buflen < 4 ? buflen : 4; /* portable MIN ... */
641 tmp = RUMPUSER_RANDOM();
642 memcpy(p, &tmp, chunk);
643 p++;
644 buflen -= chunk;
645 } while (chunk);
646
647 return origlen;
648 }
649