sp_common.c revision 1.32 1 /* $NetBSD: sp_common.c,v 1.32 2012/07/27 09:09:05 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Common client/server sysproxy routines. #included.
30 */
31
32 #include "rumpuser_port.h"
33
34 #include <sys/types.h>
35 #include <sys/mman.h>
36 #include <sys/queue.h>
37 #include <sys/socket.h>
38 #include <sys/un.h>
39
40 #include <arpa/inet.h>
41 #include <netinet/in.h>
42 #include <netinet/tcp.h>
43
44 #include <assert.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <inttypes.h>
48 #include <limits.h>
49 #include <poll.h>
50 #include <pthread.h>
51 #include <stdarg.h>
52 #include <stddef.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <unistd.h>
57
58 /*
59 * XXX: NetBSD's __unused collides with Linux headers, so we cannot
60 * define it before we've included everything.
61 */
62 #if !defined(__unused) && defined(__GNUC__)
63 #define __unused __attribute__((__unused__))
64 #endif
65
66 //#define DEBUG
67 #ifdef DEBUG
68 #define DPRINTF(x) mydprintf x
69 static void
70 mydprintf(const char *fmt, ...)
71 {
72 va_list ap;
73
74 va_start(ap, fmt);
75 vfprintf(stderr, fmt, ap);
76 va_end(ap);
77 }
78 #else
79 #define DPRINTF(x)
80 #endif
81
82 #ifndef HOSTOPS
83 #define host_poll poll
84 #define host_read read
85 #define host_sendmsg sendmsg
86 #define host_setsockopt setsockopt
87 #endif
88
89 #define IOVPUT(_io_, _b_) _io_.iov_base = &_b_; _io_.iov_len = sizeof(_b_);
90 #define IOVPUT_WITHSIZE(_io_, _b_, _l_) _io_.iov_base = _b_; _io_.iov_len = _l_;
91 #define SENDIOV(_spc_, _iov_) dosend(_spc_, _iov_, __arraycount(_iov_))
92
93 /*
94 * Bah, I hate writing on-off-wire conversions in C
95 */
96
97 enum { RUMPSP_REQ, RUMPSP_RESP, RUMPSP_ERROR };
98 enum { RUMPSP_HANDSHAKE,
99 RUMPSP_SYSCALL,
100 RUMPSP_COPYIN, RUMPSP_COPYINSTR,
101 RUMPSP_COPYOUT, RUMPSP_COPYOUTSTR,
102 RUMPSP_ANONMMAP,
103 RUMPSP_PREFORK,
104 RUMPSP_RAISE };
105
106 enum { HANDSHAKE_GUEST, HANDSHAKE_AUTH, HANDSHAKE_FORK, HANDSHAKE_EXEC };
107
108 #define AUTHLEN 4 /* 128bit fork auth */
109
110 struct rsp_hdr {
111 uint64_t rsp_len;
112 uint64_t rsp_reqno;
113 uint16_t rsp_class;
114 uint16_t rsp_type;
115 /*
116 * We want this structure 64bit-aligned for typecast fun,
117 * so might as well use the following for something.
118 */
119 union {
120 uint32_t sysnum;
121 uint32_t error;
122 uint32_t handshake;
123 uint32_t signo;
124 } u;
125 };
126 #define HDRSZ sizeof(struct rsp_hdr)
127 #define rsp_sysnum u.sysnum
128 #define rsp_error u.error
129 #define rsp_handshake u.handshake
130 #define rsp_signo u.signo
131
132 #define MAXBANNER 96
133
134 /*
135 * Data follows the header. We have two types of structured data.
136 */
137
138 /* copyin/copyout */
139 struct rsp_copydata {
140 size_t rcp_len;
141 void *rcp_addr;
142 uint8_t rcp_data[0];
143 };
144
145 /* syscall response */
146 struct rsp_sysresp {
147 int rsys_error;
148 register_t rsys_retval[2];
149 };
150
151 struct handshake_fork {
152 uint32_t rf_auth[4];
153 int rf_cancel;
154 };
155
156 struct respwait {
157 uint64_t rw_reqno;
158 void *rw_data;
159 size_t rw_dlen;
160 int rw_done;
161 int rw_error;
162
163 pthread_cond_t rw_cv;
164
165 TAILQ_ENTRY(respwait) rw_entries;
166 };
167
168 struct prefork;
169 struct spclient {
170 int spc_fd;
171 int spc_refcnt;
172 int spc_state;
173
174 pthread_mutex_t spc_mtx;
175 pthread_cond_t spc_cv;
176
177 struct lwp *spc_mainlwp;
178 pid_t spc_pid;
179
180 TAILQ_HEAD(, respwait) spc_respwait;
181
182 /* rest of the fields are zeroed upon disconnect */
183 #define SPC_ZEROFF offsetof(struct spclient, spc_pfd)
184 struct pollfd *spc_pfd;
185
186 struct rsp_hdr spc_hdr;
187 uint8_t *spc_buf;
188 size_t spc_off;
189
190 uint64_t spc_nextreq;
191 uint64_t spc_syscallreq;
192 uint64_t spc_generation;
193 int spc_ostatus, spc_istatus;
194 int spc_reconnecting;
195 int spc_inexec;
196
197 LIST_HEAD(, prefork) spc_pflist;
198 };
199 #define SPCSTATUS_FREE 0
200 #define SPCSTATUS_BUSY 1
201 #define SPCSTATUS_WANTED 2
202
203 #define SPCSTATE_NEW 0
204 #define SPCSTATE_RUNNING 1
205 #define SPCSTATE_DYING 2
206
207 typedef int (*addrparse_fn)(const char *, struct sockaddr **, int);
208 typedef int (*connecthook_fn)(int);
209 typedef void (*cleanup_fn)(struct sockaddr *);
210
211 static int readframe(struct spclient *);
212 static void handlereq(struct spclient *);
213
214 static __inline void
215 spcresetbuf(struct spclient *spc)
216 {
217
218 spc->spc_buf = NULL;
219 spc->spc_off = 0;
220 }
221
222 static __inline void
223 spcfreebuf(struct spclient *spc)
224 {
225
226 free(spc->spc_buf);
227 spcresetbuf(spc);
228 }
229
230 static void
231 sendlockl(struct spclient *spc)
232 {
233
234 while (spc->spc_ostatus != SPCSTATUS_FREE) {
235 spc->spc_ostatus = SPCSTATUS_WANTED;
236 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
237 }
238 spc->spc_ostatus = SPCSTATUS_BUSY;
239 }
240
241 static void __unused
242 sendlock(struct spclient *spc)
243 {
244
245 pthread_mutex_lock(&spc->spc_mtx);
246 sendlockl(spc);
247 pthread_mutex_unlock(&spc->spc_mtx);
248 }
249
250 static void
251 sendunlockl(struct spclient *spc)
252 {
253
254 if (spc->spc_ostatus == SPCSTATUS_WANTED)
255 pthread_cond_broadcast(&spc->spc_cv);
256 spc->spc_ostatus = SPCSTATUS_FREE;
257 }
258
259 static void
260 sendunlock(struct spclient *spc)
261 {
262
263 pthread_mutex_lock(&spc->spc_mtx);
264 sendunlockl(spc);
265 pthread_mutex_unlock(&spc->spc_mtx);
266 }
267
268 static int
269 dosend(struct spclient *spc, struct iovec *iov, size_t iovlen)
270 {
271 struct msghdr msg;
272 struct pollfd pfd;
273 ssize_t n = 0;
274 int fd = spc->spc_fd;
275
276 pfd.fd = fd;
277 pfd.events = POLLOUT;
278
279 memset(&msg, 0, sizeof(msg));
280
281 for (;;) {
282 /* not first round? poll */
283 if (n) {
284 if (host_poll(&pfd, 1, INFTIM) == -1) {
285 if (errno == EINTR)
286 continue;
287 return errno;
288 }
289 }
290
291 msg.msg_iov = iov;
292 msg.msg_iovlen = iovlen;
293 n = host_sendmsg(fd, &msg, MSG_NOSIGNAL);
294 if (n == -1) {
295 if (errno == EPIPE)
296 return ENOTCONN;
297 if (errno != EAGAIN)
298 return errno;
299 continue;
300 }
301 if (n == 0) {
302 return ENOTCONN;
303 }
304
305 /* ok, need to adjust iovec for potential next round */
306 while (n >= (ssize_t)iov[0].iov_len && iovlen) {
307 n -= iov[0].iov_len;
308 iov++;
309 iovlen--;
310 }
311
312 if (iovlen == 0) {
313 _DIAGASSERT(n == 0);
314 break;
315 } else {
316 iov[0].iov_base = (uint8_t *)iov[0].iov_base + n;
317 iov[0].iov_len -= n;
318 }
319 }
320
321 return 0;
322 }
323
324 static void
325 doputwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr)
326 {
327
328 rw->rw_data = NULL;
329 rw->rw_dlen = rw->rw_done = rw->rw_error = 0;
330 pthread_cond_init(&rw->rw_cv, NULL);
331
332 pthread_mutex_lock(&spc->spc_mtx);
333 rw->rw_reqno = rhdr->rsp_reqno = spc->spc_nextreq++;
334 TAILQ_INSERT_TAIL(&spc->spc_respwait, rw, rw_entries);
335 }
336
337 static void __unused
338 putwait_locked(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr)
339 {
340
341 doputwait(spc, rw, rhdr);
342 pthread_mutex_unlock(&spc->spc_mtx);
343 }
344
345 static void
346 putwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr)
347 {
348
349 doputwait(spc, rw, rhdr);
350 sendlockl(spc);
351 pthread_mutex_unlock(&spc->spc_mtx);
352 }
353
354 static void
355 dounputwait(struct spclient *spc, struct respwait *rw)
356 {
357
358 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
359 pthread_mutex_unlock(&spc->spc_mtx);
360 pthread_cond_destroy(&rw->rw_cv);
361
362 }
363
364 static void __unused
365 unputwait_locked(struct spclient *spc, struct respwait *rw)
366 {
367
368 pthread_mutex_lock(&spc->spc_mtx);
369 dounputwait(spc, rw);
370 }
371
372 static void
373 unputwait(struct spclient *spc, struct respwait *rw)
374 {
375
376 pthread_mutex_lock(&spc->spc_mtx);
377 sendunlockl(spc);
378
379 dounputwait(spc, rw);
380 }
381
382 static void
383 kickwaiter(struct spclient *spc)
384 {
385 struct respwait *rw;
386 int error = 0;
387
388 pthread_mutex_lock(&spc->spc_mtx);
389 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries) {
390 if (rw->rw_reqno == spc->spc_hdr.rsp_reqno)
391 break;
392 }
393 if (rw == NULL) {
394 DPRINTF(("no waiter found, invalid reqno %" PRIu64 "?\n",
395 spc->spc_hdr.rsp_reqno));
396 pthread_mutex_unlock(&spc->spc_mtx);
397 spcfreebuf(spc);
398 return;
399 }
400 DPRINTF(("rump_sp: client %p woke up waiter at %p\n", spc, rw));
401 rw->rw_data = spc->spc_buf;
402 rw->rw_done = 1;
403 rw->rw_dlen = (size_t)(spc->spc_off - HDRSZ);
404 if (spc->spc_hdr.rsp_class == RUMPSP_ERROR) {
405 error = rw->rw_error = spc->spc_hdr.rsp_error;
406 }
407 pthread_cond_signal(&rw->rw_cv);
408 pthread_mutex_unlock(&spc->spc_mtx);
409
410 if (error)
411 spcfreebuf(spc);
412 else
413 spcresetbuf(spc);
414 }
415
416 static void
417 kickall(struct spclient *spc)
418 {
419 struct respwait *rw;
420
421 /* DIAGASSERT(mutex_owned(spc_lock)) */
422 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries)
423 pthread_cond_broadcast(&rw->rw_cv);
424 }
425
426 static int
427 readframe(struct spclient *spc)
428 {
429 int fd = spc->spc_fd;
430 size_t left;
431 size_t framelen;
432 ssize_t n;
433
434 /* still reading header? */
435 if (spc->spc_off < HDRSZ) {
436 DPRINTF(("rump_sp: readframe getting header at offset %zu\n",
437 spc->spc_off));
438
439 left = HDRSZ - spc->spc_off;
440 /*LINTED: cast ok */
441 n = host_read(fd, (uint8_t*)&spc->spc_hdr + spc->spc_off, left);
442 if (n == 0) {
443 return -1;
444 }
445 if (n == -1) {
446 if (errno == EAGAIN)
447 return 0;
448 return -1;
449 }
450
451 spc->spc_off += n;
452 if (spc->spc_off < HDRSZ) {
453 return 0;
454 }
455
456 /*LINTED*/
457 framelen = spc->spc_hdr.rsp_len;
458
459 if (framelen < HDRSZ) {
460 return -1;
461 } else if (framelen == HDRSZ) {
462 return 1;
463 }
464
465 spc->spc_buf = malloc(framelen - HDRSZ);
466 if (spc->spc_buf == NULL) {
467 return -1;
468 }
469 memset(spc->spc_buf, 0, framelen - HDRSZ);
470
471 /* "fallthrough" */
472 } else {
473 /*LINTED*/
474 framelen = spc->spc_hdr.rsp_len;
475 }
476
477 left = framelen - spc->spc_off;
478
479 DPRINTF(("rump_sp: readframe getting body at offset %zu, left %zu\n",
480 spc->spc_off, left));
481
482 if (left == 0)
483 return 1;
484 n = host_read(fd, spc->spc_buf + (spc->spc_off - HDRSZ), left);
485 if (n == 0) {
486 return -1;
487 }
488 if (n == -1) {
489 if (errno == EAGAIN)
490 return 0;
491 return -1;
492 }
493 spc->spc_off += n;
494 left -= n;
495
496 /* got everything? */
497 if (left == 0)
498 return 1;
499 else
500 return 0;
501 }
502
503 static int
504 tcp_parse(const char *addr, struct sockaddr **sa, int allow_wildcard)
505 {
506 struct sockaddr_in sin;
507 char buf[64];
508 const char *p;
509 size_t l;
510 int port;
511
512 memset(&sin, 0, sizeof(sin));
513 SA_SETLEN(&sin, sizeof(sin));
514 sin.sin_family = AF_INET;
515
516 p = strchr(addr, ':');
517 if (!p) {
518 fprintf(stderr, "rump_sp_tcp: missing port specifier\n");
519 return EINVAL;
520 }
521
522 l = p - addr;
523 if (l > sizeof(buf)-1) {
524 fprintf(stderr, "rump_sp_tcp: address too long\n");
525 return EINVAL;
526 }
527 strncpy(buf, addr, l);
528 buf[l] = '\0';
529
530 /* special INADDR_ANY treatment */
531 if (strcmp(buf, "*") == 0 || strcmp(buf, "0") == 0) {
532 sin.sin_addr.s_addr = INADDR_ANY;
533 } else {
534 switch (inet_pton(AF_INET, buf, &sin.sin_addr)) {
535 case 1:
536 break;
537 case 0:
538 fprintf(stderr, "rump_sp_tcp: cannot parse %s\n", buf);
539 return EINVAL;
540 case -1:
541 fprintf(stderr, "rump_sp_tcp: inet_pton failed\n");
542 return errno;
543 default:
544 assert(/*CONSTCOND*/0);
545 return EINVAL;
546 }
547 }
548
549 if (!allow_wildcard && sin.sin_addr.s_addr == INADDR_ANY) {
550 fprintf(stderr, "rump_sp_tcp: client needs !INADDR_ANY\n");
551 return EINVAL;
552 }
553
554 /* advance to port number & parse */
555 p++;
556 l = strspn(p, "0123456789");
557 if (l == 0) {
558 fprintf(stderr, "rump_sp_tcp: port now found: %s\n", p);
559 return EINVAL;
560 }
561 strncpy(buf, p, l);
562 buf[l] = '\0';
563
564 if (*(p+l) != '/' && *(p+l) != '\0') {
565 fprintf(stderr, "rump_sp_tcp: junk at end of port: %s\n", addr);
566 return EINVAL;
567 }
568
569 port = atoi(buf);
570 if (port < 0 || port >= (1<<(8*sizeof(in_port_t)))) {
571 fprintf(stderr, "rump_sp_tcp: port %d out of range\n", port);
572 return ERANGE;
573 }
574 sin.sin_port = htons(port);
575
576 *sa = malloc(sizeof(sin));
577 if (*sa == NULL)
578 return errno;
579 memcpy(*sa, &sin, sizeof(sin));
580 return 0;
581 }
582
583 static int
584 tcp_connecthook(int s)
585 {
586 int x;
587
588 x = 1;
589 host_setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &x, sizeof(x));
590
591 return 0;
592 }
593
594 static char parsedurl[256];
595
596 /*ARGSUSED*/
597 static int
598 unix_parse(const char *addr, struct sockaddr **sa, int allow_wildcard)
599 {
600 struct sockaddr_un sun;
601 size_t slen;
602 int savepath = 0;
603
604 if (strlen(addr) >= sizeof(sun.sun_path))
605 return ENAMETOOLONG;
606
607 /*
608 * The pathname can be all kinds of spaghetti elementals,
609 * so meek and obidient we accept everything. However, use
610 * full path for easy cleanup in case someone gives a relative
611 * one and the server does a chdir() between now than the
612 * cleanup.
613 */
614 memset(&sun, 0, sizeof(sun));
615 sun.sun_family = AF_LOCAL;
616 if (*addr != '/') {
617 char mywd[PATH_MAX];
618
619 if (getcwd(mywd, sizeof(mywd)) == NULL) {
620 fprintf(stderr, "warning: cannot determine cwd, "
621 "omitting socket cleanup\n");
622 } else {
623 if (strlen(addr)+strlen(mywd)+1 >= sizeof(sun.sun_path))
624 return ENAMETOOLONG;
625 strcpy(sun.sun_path, mywd);
626 strcat(sun.sun_path, "/");
627 savepath = 1;
628 }
629 }
630 strcat(sun.sun_path, addr);
631 #ifdef __linux__
632 slen = sizeof(sun);
633 #else
634 sun.sun_len = SUN_LEN(&sun);
635 slen = sun.sun_len+1; /* get the 0 too */
636 #endif
637
638 if (savepath && *parsedurl == '\0') {
639 snprintf(parsedurl, sizeof(parsedurl),
640 "unix://%s", sun.sun_path);
641 }
642
643 *sa = malloc(slen);
644 if (*sa == NULL)
645 return errno;
646 memcpy(*sa, &sun, slen);
647
648 return 0;
649 }
650
651 static void
652 unix_cleanup(struct sockaddr *sa)
653 {
654 struct sockaddr_un *sun = (void *)sa;
655
656 /*
657 * cleanup only absolute paths. see unix_parse() above
658 */
659 if (*sun->sun_path == '/') {
660 unlink(sun->sun_path);
661 }
662 }
663
664 /*ARGSUSED*/
665 static int
666 notsupp(void)
667 {
668
669 fprintf(stderr, "rump_sp: support not yet implemented\n");
670 return EOPNOTSUPP;
671 }
672
673 static int
674 success(void)
675 {
676
677 return 0;
678 }
679
680 struct {
681 const char *id;
682 int domain;
683 socklen_t slen;
684 addrparse_fn ap;
685 connecthook_fn connhook;
686 cleanup_fn cleanup;
687 } parsetab[] = {
688 { "tcp", PF_INET, sizeof(struct sockaddr_in),
689 tcp_parse, tcp_connecthook, (cleanup_fn)success },
690 { "unix", PF_LOCAL, sizeof(struct sockaddr_un),
691 unix_parse, (connecthook_fn)success, unix_cleanup },
692 { "tcp6", PF_INET6, sizeof(struct sockaddr_in6),
693 (addrparse_fn)notsupp, (connecthook_fn)success,
694 (cleanup_fn)success },
695 };
696 #define NPARSE (sizeof(parsetab)/sizeof(parsetab[0]))
697
698 static int
699 parseurl(const char *url, struct sockaddr **sap, unsigned *idxp,
700 int allow_wildcard)
701 {
702 char id[16];
703 const char *p, *p2;
704 size_t l;
705 unsigned i;
706 int error;
707
708 /*
709 * Parse the url
710 */
711
712 p = url;
713 p2 = strstr(p, "://");
714 if (!p2) {
715 fprintf(stderr, "rump_sp: invalid locator ``%s''\n", p);
716 return EINVAL;
717 }
718 l = p2-p;
719 if (l > sizeof(id)-1) {
720 fprintf(stderr, "rump_sp: identifier too long in ``%s''\n", p);
721 return EINVAL;
722 }
723
724 strncpy(id, p, l);
725 id[l] = '\0';
726 p2 += 3; /* beginning of address */
727
728 for (i = 0; i < NPARSE; i++) {
729 if (strcmp(id, parsetab[i].id) == 0) {
730 error = parsetab[i].ap(p2, sap, allow_wildcard);
731 if (error)
732 return error;
733 break;
734 }
735 }
736 if (i == NPARSE) {
737 fprintf(stderr, "rump_sp: invalid identifier ``%s''\n", p);
738 return EINVAL;
739 }
740
741 *idxp = i;
742 return 0;
743 }
744