sp_common.c revision 1.27 1 /* $NetBSD: sp_common.c,v 1.27 2011/02/14 14:56:23 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Common client/server sysproxy routines. #included.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <sys/types.h>
35 #include <sys/mman.h>
36 #include <sys/queue.h>
37 #include <sys/socket.h>
38 #include <sys/un.h>
39 #include <sys/syslimits.h>
40
41 #include <arpa/inet.h>
42 #include <netinet/in.h>
43 #include <netinet/tcp.h>
44
45 #include <assert.h>
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <inttypes.h>
49 #include <poll.h>
50 #include <pthread.h>
51 #include <stdarg.h>
52 #include <stddef.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <unistd.h>
57
58 //#define DEBUG
59 #ifdef DEBUG
60 #define DPRINTF(x) mydprintf x
61 static void
62 mydprintf(const char *fmt, ...)
63 {
64 va_list ap;
65
66 va_start(ap, fmt);
67 vfprintf(stderr, fmt, ap);
68 va_end(ap);
69 }
70 #else
71 #define DPRINTF(x)
72 #endif
73
74 #ifndef HOSTOPS
75 #define host_poll poll
76 #define host_read read
77 #define host_sendto sendto
78 #define host_setsockopt setsockopt
79 #endif
80
81 /*
82 * Bah, I hate writing on-off-wire conversions in C
83 */
84
85 enum { RUMPSP_REQ, RUMPSP_RESP, RUMPSP_ERROR };
86 enum { RUMPSP_HANDSHAKE,
87 RUMPSP_SYSCALL,
88 RUMPSP_COPYIN, RUMPSP_COPYINSTR,
89 RUMPSP_COPYOUT, RUMPSP_COPYOUTSTR,
90 RUMPSP_ANONMMAP,
91 RUMPSP_PREFORK,
92 RUMPSP_RAISE };
93
94 enum { HANDSHAKE_GUEST, HANDSHAKE_AUTH, HANDSHAKE_FORK };
95
96 #define AUTHLEN 4 /* 128bit fork auth */
97
98 struct rsp_hdr {
99 uint64_t rsp_len;
100 uint64_t rsp_reqno;
101 uint16_t rsp_class;
102 uint16_t rsp_type;
103 /*
104 * We want this structure 64bit-aligned for typecast fun,
105 * so might as well use the following for something.
106 */
107 union {
108 uint32_t sysnum;
109 uint32_t error;
110 uint32_t handshake;
111 uint32_t signo;
112 } u;
113 };
114 #define HDRSZ sizeof(struct rsp_hdr)
115 #define rsp_sysnum u.sysnum
116 #define rsp_error u.error
117 #define rsp_handshake u.handshake
118 #define rsp_signo u.signo
119
120 #define MAXBANNER 96
121
122 /*
123 * Data follows the header. We have two types of structured data.
124 */
125
126 /* copyin/copyout */
127 struct rsp_copydata {
128 size_t rcp_len;
129 void *rcp_addr;
130 uint8_t rcp_data[0];
131 };
132
133 /* syscall response */
134 struct rsp_sysresp {
135 int rsys_error;
136 register_t rsys_retval[2];
137 };
138
139 struct handshake_fork {
140 uint32_t rf_auth[4];
141 int rf_cancel;
142 };
143
144 struct respwait {
145 uint64_t rw_reqno;
146 void *rw_data;
147 size_t rw_dlen;
148 int rw_done;
149 int rw_error;
150
151 pthread_cond_t rw_cv;
152
153 TAILQ_ENTRY(respwait) rw_entries;
154 };
155
156 struct prefork;
157 struct spclient {
158 int spc_fd;
159 int spc_refcnt;
160 int spc_state;
161
162 pthread_mutex_t spc_mtx;
163 pthread_cond_t spc_cv;
164
165 struct lwp *spc_mainlwp;
166 pid_t spc_pid;
167
168 TAILQ_HEAD(, respwait) spc_respwait;
169
170 /* rest of the fields are zeroed upon disconnect */
171 #define SPC_ZEROFF offsetof(struct spclient, spc_pfd)
172 struct pollfd *spc_pfd;
173
174 struct rsp_hdr spc_hdr;
175 uint8_t *spc_buf;
176 size_t spc_off;
177
178 uint64_t spc_nextreq;
179 uint64_t spc_syscallreq;
180 uint64_t spc_generation;
181 int spc_ostatus, spc_istatus;
182 int spc_reconnecting;
183
184 LIST_HEAD(, prefork) spc_pflist;
185 };
186 #define SPCSTATUS_FREE 0
187 #define SPCSTATUS_BUSY 1
188 #define SPCSTATUS_WANTED 2
189
190 #define SPCSTATE_NEW 0
191 #define SPCSTATE_RUNNING 1
192 #define SPCSTATE_DYING 2
193
194 typedef int (*addrparse_fn)(const char *, struct sockaddr **, int);
195 typedef int (*connecthook_fn)(int);
196 typedef void (*cleanup_fn)(struct sockaddr *);
197
198 static int readframe(struct spclient *);
199 static void handlereq(struct spclient *);
200
201 static __inline void
202 spcresetbuf(struct spclient *spc)
203 {
204
205 spc->spc_buf = NULL;
206 spc->spc_off = 0;
207 }
208
209 static __inline void
210 spcfreebuf(struct spclient *spc)
211 {
212
213 free(spc->spc_buf);
214 spcresetbuf(spc);
215 }
216
217 static void
218 sendlockl(struct spclient *spc)
219 {
220
221 while (spc->spc_ostatus != SPCSTATUS_FREE) {
222 spc->spc_ostatus = SPCSTATUS_WANTED;
223 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
224 }
225 spc->spc_ostatus = SPCSTATUS_BUSY;
226 }
227
228 static void __unused
229 sendlock(struct spclient *spc)
230 {
231
232 pthread_mutex_lock(&spc->spc_mtx);
233 sendlockl(spc);
234 pthread_mutex_unlock(&spc->spc_mtx);
235 }
236
237 static void
238 sendunlockl(struct spclient *spc)
239 {
240
241 if (spc->spc_ostatus == SPCSTATUS_WANTED)
242 pthread_cond_broadcast(&spc->spc_cv);
243 spc->spc_ostatus = SPCSTATUS_FREE;
244 }
245
246 static void
247 sendunlock(struct spclient *spc)
248 {
249
250 pthread_mutex_lock(&spc->spc_mtx);
251 sendunlockl(spc);
252 pthread_mutex_unlock(&spc->spc_mtx);
253 }
254
255 static int
256 dosend(struct spclient *spc, const void *data, size_t dlen)
257 {
258 struct pollfd pfd;
259 const uint8_t *sdata = data;
260 ssize_t n;
261 size_t sent;
262 int fd = spc->spc_fd;
263
264 pfd.fd = fd;
265 pfd.events = POLLOUT;
266
267 for (sent = 0, n = 0; sent < dlen; ) {
268 if (n) {
269 if (host_poll(&pfd, 1, INFTIM) == -1) {
270 if (errno == EINTR)
271 continue;
272 return errno;
273 }
274 }
275
276 n = host_sendto(fd, sdata + sent, dlen - sent,
277 MSG_NOSIGNAL, NULL, 0);
278 if (n == -1) {
279 if (errno == EPIPE)
280 return ENOTCONN;
281 if (errno != EAGAIN)
282 return errno;
283 continue;
284 }
285 if (n == 0) {
286 return ENOTCONN;
287 }
288 sent += n;
289 }
290
291 return 0;
292 }
293
294 static void
295 doputwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr)
296 {
297
298 rw->rw_data = NULL;
299 rw->rw_dlen = rw->rw_done = rw->rw_error = 0;
300 pthread_cond_init(&rw->rw_cv, NULL);
301
302 pthread_mutex_lock(&spc->spc_mtx);
303 rw->rw_reqno = rhdr->rsp_reqno = spc->spc_nextreq++;
304 TAILQ_INSERT_TAIL(&spc->spc_respwait, rw, rw_entries);
305 }
306
307 static void __unused
308 putwait_locked(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr)
309 {
310
311 doputwait(spc, rw, rhdr);
312 pthread_mutex_unlock(&spc->spc_mtx);
313 }
314
315 static void
316 putwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr)
317 {
318
319 doputwait(spc, rw, rhdr);
320 sendlockl(spc);
321 pthread_mutex_unlock(&spc->spc_mtx);
322 }
323
324 static void
325 dounputwait(struct spclient *spc, struct respwait *rw)
326 {
327
328 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
329 pthread_mutex_unlock(&spc->spc_mtx);
330 pthread_cond_destroy(&rw->rw_cv);
331
332 }
333
334 static void __unused
335 unputwait_locked(struct spclient *spc, struct respwait *rw)
336 {
337
338 pthread_mutex_lock(&spc->spc_mtx);
339 dounputwait(spc, rw);
340 }
341
342 static void
343 unputwait(struct spclient *spc, struct respwait *rw)
344 {
345
346 pthread_mutex_lock(&spc->spc_mtx);
347 sendunlockl(spc);
348
349 dounputwait(spc, rw);
350 }
351
352 static void
353 kickwaiter(struct spclient *spc)
354 {
355 struct respwait *rw;
356 int error = 0;
357
358 pthread_mutex_lock(&spc->spc_mtx);
359 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries) {
360 if (rw->rw_reqno == spc->spc_hdr.rsp_reqno)
361 break;
362 }
363 if (rw == NULL) {
364 DPRINTF(("no waiter found, invalid reqno %" PRIu64 "?\n",
365 spc->spc_hdr.rsp_reqno));
366 pthread_mutex_unlock(&spc->spc_mtx);
367 spcfreebuf(spc);
368 return;
369 }
370 DPRINTF(("rump_sp: client %p woke up waiter at %p\n", spc, rw));
371 rw->rw_data = spc->spc_buf;
372 rw->rw_done = 1;
373 rw->rw_dlen = (size_t)(spc->spc_off - HDRSZ);
374 if (spc->spc_hdr.rsp_class == RUMPSP_ERROR) {
375 error = rw->rw_error = spc->spc_hdr.rsp_error;
376 }
377 pthread_cond_signal(&rw->rw_cv);
378 pthread_mutex_unlock(&spc->spc_mtx);
379
380 if (error)
381 spcfreebuf(spc);
382 else
383 spcresetbuf(spc);
384 }
385
386 static void
387 kickall(struct spclient *spc)
388 {
389 struct respwait *rw;
390
391 /* DIAGASSERT(mutex_owned(spc_lock)) */
392 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries)
393 pthread_cond_broadcast(&rw->rw_cv);
394 }
395
396 static int
397 readframe(struct spclient *spc)
398 {
399 int fd = spc->spc_fd;
400 size_t left;
401 size_t framelen;
402 ssize_t n;
403
404 /* still reading header? */
405 if (spc->spc_off < HDRSZ) {
406 DPRINTF(("rump_sp: readframe getting header at offset %zu\n",
407 spc->spc_off));
408
409 left = HDRSZ - spc->spc_off;
410 /*LINTED: cast ok */
411 n = host_read(fd, (uint8_t*)&spc->spc_hdr + spc->spc_off, left);
412 if (n == 0) {
413 return -1;
414 }
415 if (n == -1) {
416 if (errno == EAGAIN)
417 return 0;
418 return -1;
419 }
420
421 spc->spc_off += n;
422 if (spc->spc_off < HDRSZ)
423 return -1;
424
425 /*LINTED*/
426 framelen = spc->spc_hdr.rsp_len;
427
428 if (framelen < HDRSZ) {
429 return -1;
430 } else if (framelen == HDRSZ) {
431 return 1;
432 }
433
434 spc->spc_buf = malloc(framelen - HDRSZ);
435 if (spc->spc_buf == NULL) {
436 return -1;
437 }
438 memset(spc->spc_buf, 0, framelen - HDRSZ);
439
440 /* "fallthrough" */
441 } else {
442 /*LINTED*/
443 framelen = spc->spc_hdr.rsp_len;
444 }
445
446 left = framelen - spc->spc_off;
447
448 DPRINTF(("rump_sp: readframe getting body at offset %zu, left %zu\n",
449 spc->spc_off, left));
450
451 if (left == 0)
452 return 1;
453 n = host_read(fd, spc->spc_buf + (spc->spc_off - HDRSZ), left);
454 if (n == 0) {
455 return -1;
456 }
457 if (n == -1) {
458 if (errno == EAGAIN)
459 return 0;
460 return -1;
461 }
462 spc->spc_off += n;
463 left -= n;
464
465 /* got everything? */
466 if (left == 0)
467 return 1;
468 else
469 return 0;
470 }
471
472 static int
473 tcp_parse(const char *addr, struct sockaddr **sa, int allow_wildcard)
474 {
475 struct sockaddr_in sin;
476 char buf[64];
477 const char *p;
478 size_t l;
479 int port;
480
481 memset(&sin, 0, sizeof(sin));
482 sin.sin_len = sizeof(sin);
483 sin.sin_family = AF_INET;
484
485 p = strchr(addr, ':');
486 if (!p) {
487 fprintf(stderr, "rump_sp_tcp: missing port specifier\n");
488 return EINVAL;
489 }
490
491 l = p - addr;
492 if (l > sizeof(buf)-1) {
493 fprintf(stderr, "rump_sp_tcp: address too long\n");
494 return EINVAL;
495 }
496 strncpy(buf, addr, l);
497 buf[l] = '\0';
498
499 /* special INADDR_ANY treatment */
500 if (strcmp(buf, "*") == 0 || strcmp(buf, "0") == 0) {
501 sin.sin_addr.s_addr = INADDR_ANY;
502 } else {
503 switch (inet_pton(AF_INET, buf, &sin.sin_addr)) {
504 case 1:
505 break;
506 case 0:
507 fprintf(stderr, "rump_sp_tcp: cannot parse %s\n", buf);
508 return EINVAL;
509 case -1:
510 fprintf(stderr, "rump_sp_tcp: inet_pton failed\n");
511 return errno;
512 default:
513 assert(/*CONSTCOND*/0);
514 return EINVAL;
515 }
516 }
517
518 if (!allow_wildcard && sin.sin_addr.s_addr == INADDR_ANY) {
519 fprintf(stderr, "rump_sp_tcp: client needs !INADDR_ANY\n");
520 return EINVAL;
521 }
522
523 /* advance to port number & parse */
524 p++;
525 l = strspn(p, "0123456789");
526 if (l == 0) {
527 fprintf(stderr, "rump_sp_tcp: port now found: %s\n", p);
528 return EINVAL;
529 }
530 strncpy(buf, p, l);
531 buf[l] = '\0';
532
533 if (*(p+l) != '/' && *(p+l) != '\0') {
534 fprintf(stderr, "rump_sp_tcp: junk at end of port: %s\n", addr);
535 return EINVAL;
536 }
537
538 port = atoi(buf);
539 if (port < 0 || port >= (1<<(8*sizeof(in_port_t)))) {
540 fprintf(stderr, "rump_sp_tcp: port %d out of range\n", port);
541 return ERANGE;
542 }
543 sin.sin_port = htons(port);
544
545 *sa = malloc(sizeof(sin));
546 if (*sa == NULL)
547 return errno;
548 memcpy(*sa, &sin, sizeof(sin));
549 return 0;
550 }
551
552 static int
553 tcp_connecthook(int s)
554 {
555 int x;
556
557 x = 1;
558 host_setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &x, sizeof(x));
559
560 return 0;
561 }
562
563 static char parsedurl[256];
564
565 /*ARGSUSED*/
566 static int
567 unix_parse(const char *addr, struct sockaddr **sa, int allow_wildcard)
568 {
569 struct sockaddr_un sun;
570 size_t slen;
571 int savepath = 0;
572
573 if (strlen(addr) > sizeof(sun.sun_path))
574 return ENAMETOOLONG;
575
576 /*
577 * The pathname can be all kinds of spaghetti elementals,
578 * so meek and obidient we accept everything. However, use
579 * full path for easy cleanup in case someone gives a relative
580 * one and the server does a chdir() between now than the
581 * cleanup.
582 */
583 memset(&sun, 0, sizeof(sun));
584 sun.sun_family = AF_LOCAL;
585 if (*addr != '/') {
586 char mywd[PATH_MAX];
587
588 if (getcwd(mywd, sizeof(mywd)) == NULL) {
589 fprintf(stderr, "warning: cannot determine cwd, "
590 "omitting socket cleanup\n");
591 } else {
592 if (strlen(addr) + strlen(mywd) > sizeof(sun.sun_path))
593 return ENAMETOOLONG;
594 strlcpy(sun.sun_path, mywd, sizeof(sun.sun_path));
595 strlcat(sun.sun_path, "/", sizeof(sun.sun_path));
596 savepath = 1;
597 }
598 }
599 strlcat(sun.sun_path, addr, sizeof(sun.sun_path));
600 sun.sun_len = SUN_LEN(&sun);
601 slen = sun.sun_len+1; /* get the 0 too */
602
603 if (savepath && *parsedurl == '\0') {
604 snprintf(parsedurl, sizeof(parsedurl),
605 "unix://%s", sun.sun_path);
606 }
607
608 *sa = malloc(slen);
609 if (*sa == NULL)
610 return errno;
611 memcpy(*sa, &sun, slen);
612
613 return 0;
614 }
615
616 static void
617 unix_cleanup(struct sockaddr *sa)
618 {
619 struct sockaddr_un *sun = (void *)sa;
620
621 /*
622 * cleanup only absolute paths. see unix_parse() above
623 */
624 if (*sun->sun_path == '/') {
625 unlink(sun->sun_path);
626 }
627 }
628
629 /*ARGSUSED*/
630 static int
631 notsupp(void)
632 {
633
634 fprintf(stderr, "rump_sp: support not yet implemented\n");
635 return EOPNOTSUPP;
636 }
637
638 static int
639 success(void)
640 {
641
642 return 0;
643 }
644
645 struct {
646 const char *id;
647 int domain;
648 addrparse_fn ap;
649 connecthook_fn connhook;
650 cleanup_fn cleanup;
651 } parsetab[] = {
652 { "tcp", PF_INET, tcp_parse, tcp_connecthook, (cleanup_fn)success },
653 { "unix", PF_LOCAL, unix_parse, (connecthook_fn)success, unix_cleanup },
654 { "tcp6", PF_INET6, (addrparse_fn)notsupp, (connecthook_fn)success,
655 (cleanup_fn)success },
656 };
657 #define NPARSE (sizeof(parsetab)/sizeof(parsetab[0]))
658
659 static int
660 parseurl(const char *url, struct sockaddr **sap, unsigned *idxp,
661 int allow_wildcard)
662 {
663 char id[16];
664 const char *p, *p2;
665 size_t l;
666 unsigned i;
667 int error;
668
669 /*
670 * Parse the url
671 */
672
673 p = url;
674 p2 = strstr(p, "://");
675 if (!p2) {
676 fprintf(stderr, "rump_sp: invalid locator ``%s''\n", p);
677 return EINVAL;
678 }
679 l = p2-p;
680 if (l > sizeof(id)-1) {
681 fprintf(stderr, "rump_sp: identifier too long in ``%s''\n", p);
682 return EINVAL;
683 }
684
685 strncpy(id, p, l);
686 id[l] = '\0';
687 p2 += 3; /* beginning of address */
688
689 for (i = 0; i < NPARSE; i++) {
690 if (strcmp(id, parsetab[i].id) == 0) {
691 error = parsetab[i].ap(p2, sap, allow_wildcard);
692 if (error)
693 return error;
694 break;
695 }
696 }
697 if (i == NPARSE) {
698 fprintf(stderr, "rump_sp: invalid identifier ``%s''\n", p);
699 return EINVAL;
700 }
701
702 *idxp = i;
703 return 0;
704 }
705