sp_common.c revision 1.24 1 /* $NetBSD: sp_common.c,v 1.24 2011/01/14 13:12:14 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Common client/server sysproxy routines. #included.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <sys/types.h>
35 #include <sys/mman.h>
36 #include <sys/queue.h>
37 #include <sys/socket.h>
38 #include <sys/un.h>
39 #include <sys/syslimits.h>
40
41 #include <arpa/inet.h>
42 #include <netinet/in.h>
43 #include <netinet/tcp.h>
44
45 #include <assert.h>
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <inttypes.h>
49 #include <poll.h>
50 #include <pthread.h>
51 #include <stdarg.h>
52 #include <stddef.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <unistd.h>
57
58 //#define DEBUG
59 #ifdef DEBUG
60 #define DPRINTF(x) mydprintf x
61 static void
62 mydprintf(const char *fmt, ...)
63 {
64 va_list ap;
65
66 va_start(ap, fmt);
67 vfprintf(stderr, fmt, ap);
68 va_end(ap);
69 }
70 #else
71 #define DPRINTF(x)
72 #endif
73
74 #ifndef HOSTOPS
75 #define host_poll poll
76 #define host_read read
77 #define host_sendto sendto
78 #define host_setsockopt setsockopt
79 #endif
80
81 /*
82 * Bah, I hate writing on-off-wire conversions in C
83 */
84
85 enum { RUMPSP_REQ, RUMPSP_RESP, RUMPSP_ERROR };
86 enum { RUMPSP_HANDSHAKE,
87 RUMPSP_SYSCALL,
88 RUMPSP_COPYIN, RUMPSP_COPYINSTR,
89 RUMPSP_COPYOUT, RUMPSP_COPYOUTSTR,
90 RUMPSP_ANONMMAP,
91 RUMPSP_PREFORK,
92 RUMPSP_RAISE };
93
94 enum { HANDSHAKE_GUEST, HANDSHAKE_AUTH, HANDSHAKE_FORK };
95
96 #define AUTHLEN 4 /* 128bit fork auth */
97
98 struct rsp_hdr {
99 uint64_t rsp_len;
100 uint64_t rsp_reqno;
101 uint16_t rsp_class;
102 uint16_t rsp_type;
103 /*
104 * We want this structure 64bit-aligned for typecast fun,
105 * so might as well use the following for something.
106 */
107 union {
108 uint32_t sysnum;
109 uint32_t error;
110 uint32_t handshake;
111 uint32_t signo;
112 } u;
113 };
114 #define HDRSZ sizeof(struct rsp_hdr)
115 #define rsp_sysnum u.sysnum
116 #define rsp_error u.error
117 #define rsp_handshake u.handshake
118 #define rsp_signo u.signo
119
120 #define MAXBANNER 96
121
122 /*
123 * Data follows the header. We have two types of structured data.
124 */
125
126 /* copyin/copyout */
127 struct rsp_copydata {
128 size_t rcp_len;
129 void *rcp_addr;
130 uint8_t rcp_data[0];
131 };
132
133 /* syscall response */
134 struct rsp_sysresp {
135 int rsys_error;
136 register_t rsys_retval[2];
137 };
138
139 struct handshake_fork {
140 uint32_t rf_auth[4];
141 int rf_cancel;
142 };
143
144 struct respwait {
145 uint64_t rw_reqno;
146 void *rw_data;
147 size_t rw_dlen;
148 int rw_done;
149 int rw_error;
150
151 pthread_cond_t rw_cv;
152
153 TAILQ_ENTRY(respwait) rw_entries;
154 };
155
156 struct prefork;
157 struct spclient {
158 int spc_fd;
159 int spc_refcnt;
160 int spc_state;
161
162 pthread_mutex_t spc_mtx;
163 pthread_cond_t spc_cv;
164
165 struct lwp *spc_mainlwp;
166 pid_t spc_pid;
167
168 TAILQ_HEAD(, respwait) spc_respwait;
169
170 /* rest of the fields are zeroed upon disconnect */
171 #define SPC_ZEROFF offsetof(struct spclient, spc_pfd)
172 struct pollfd *spc_pfd;
173
174 struct rsp_hdr spc_hdr;
175 uint8_t *spc_buf;
176 size_t spc_off;
177
178 uint64_t spc_nextreq;
179 int spc_ostatus, spc_istatus;
180
181 LIST_HEAD(, prefork) spc_pflist;
182 };
183 #define SPCSTATUS_FREE 0
184 #define SPCSTATUS_BUSY 1
185 #define SPCSTATUS_WANTED 2
186
187 #define SPCSTATE_NEW 0
188 #define SPCSTATE_RUNNING 1
189 #define SPCSTATE_DYING 2
190
191 typedef int (*addrparse_fn)(const char *, struct sockaddr **, int);
192 typedef int (*connecthook_fn)(int);
193 typedef void (*cleanup_fn)(struct sockaddr *);
194
195 static int readframe(struct spclient *);
196 static void handlereq(struct spclient *);
197
198 static __inline void
199 spcresetbuf(struct spclient *spc)
200 {
201
202 spc->spc_buf = NULL;
203 spc->spc_off = 0;
204 }
205
206 static __inline void
207 spcfreebuf(struct spclient *spc)
208 {
209
210 free(spc->spc_buf);
211 spcresetbuf(spc);
212 }
213
214 static void
215 sendlockl(struct spclient *spc)
216 {
217
218 while (spc->spc_ostatus != SPCSTATUS_FREE) {
219 spc->spc_ostatus = SPCSTATUS_WANTED;
220 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
221 }
222 spc->spc_ostatus = SPCSTATUS_BUSY;
223 }
224
225 static void
226 sendlock(struct spclient *spc)
227 {
228
229 pthread_mutex_lock(&spc->spc_mtx);
230 sendlockl(spc);
231 pthread_mutex_unlock(&spc->spc_mtx);
232 }
233
234 static void
235 sendunlockl(struct spclient *spc)
236 {
237
238 if (spc->spc_ostatus == SPCSTATUS_WANTED)
239 pthread_cond_broadcast(&spc->spc_cv);
240 spc->spc_ostatus = SPCSTATUS_FREE;
241 }
242
243 static void
244 sendunlock(struct spclient *spc)
245 {
246
247 pthread_mutex_lock(&spc->spc_mtx);
248 sendunlockl(spc);
249 pthread_mutex_unlock(&spc->spc_mtx);
250 }
251
252 static int
253 dosend(struct spclient *spc, const void *data, size_t dlen)
254 {
255 struct pollfd pfd;
256 const uint8_t *sdata = data;
257 ssize_t n;
258 size_t sent;
259 int fd = spc->spc_fd;
260
261 pfd.fd = fd;
262 pfd.events = POLLOUT;
263
264 for (sent = 0, n = 0; sent < dlen; ) {
265 if (n) {
266 if (host_poll(&pfd, 1, INFTIM) == -1) {
267 if (errno == EINTR)
268 continue;
269 return errno;
270 }
271 }
272
273 n = host_sendto(fd, sdata + sent, dlen - sent,
274 MSG_NOSIGNAL, NULL, 0);
275 if (n == 0) {
276 return ENOTCONN;
277 }
278 if (n == -1) {
279 if (errno != EAGAIN)
280 return errno;
281 continue;
282 }
283 sent += n;
284 }
285
286 return 0;
287 }
288
289 static void
290 putwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr)
291 {
292
293 rw->rw_data = NULL;
294 rw->rw_dlen = rw->rw_done = rw->rw_error = 0;
295 pthread_cond_init(&rw->rw_cv, NULL);
296
297 pthread_mutex_lock(&spc->spc_mtx);
298 rw->rw_reqno = rhdr->rsp_reqno = spc->spc_nextreq++;
299 TAILQ_INSERT_TAIL(&spc->spc_respwait, rw, rw_entries);
300
301 sendlockl(spc);
302 pthread_mutex_unlock(&spc->spc_mtx);
303 }
304
305 static void
306 unputwait(struct spclient *spc, struct respwait *rw)
307 {
308
309 pthread_mutex_lock(&spc->spc_mtx);
310 sendunlockl(spc);
311
312 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
313 pthread_mutex_unlock(&spc->spc_mtx);
314 pthread_cond_destroy(&rw->rw_cv);
315 }
316
317 static void
318 kickwaiter(struct spclient *spc)
319 {
320 struct respwait *rw;
321 int error = 0;
322
323 pthread_mutex_lock(&spc->spc_mtx);
324 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries) {
325 if (rw->rw_reqno == spc->spc_hdr.rsp_reqno)
326 break;
327 }
328 if (rw == NULL) {
329 DPRINTF(("no waiter found, invalid reqno %" PRIu64 "?\n",
330 spc->spc_hdr.rsp_reqno));
331 pthread_mutex_unlock(&spc->spc_mtx);
332 spcfreebuf(spc);
333 return;
334 }
335 DPRINTF(("rump_sp: client %p woke up waiter at %p\n", spc, rw));
336 rw->rw_data = spc->spc_buf;
337 rw->rw_done = 1;
338 rw->rw_dlen = (size_t)(spc->spc_off - HDRSZ);
339 if (spc->spc_hdr.rsp_class == RUMPSP_ERROR) {
340 error = rw->rw_error = spc->spc_hdr.rsp_error;
341 }
342 pthread_cond_signal(&rw->rw_cv);
343 pthread_mutex_unlock(&spc->spc_mtx);
344
345 if (error)
346 spcfreebuf(spc);
347 else
348 spcresetbuf(spc);
349 }
350
351 static void
352 kickall(struct spclient *spc)
353 {
354 struct respwait *rw;
355
356 /* DIAGASSERT(mutex_owned(spc_lock)) */
357 TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries)
358 pthread_cond_broadcast(&rw->rw_cv);
359 }
360
361 static int
362 readframe(struct spclient *spc)
363 {
364 int fd = spc->spc_fd;
365 size_t left;
366 size_t framelen;
367 ssize_t n;
368
369 /* still reading header? */
370 if (spc->spc_off < HDRSZ) {
371 DPRINTF(("rump_sp: readframe getting header at offset %zu\n",
372 spc->spc_off));
373
374 left = HDRSZ - spc->spc_off;
375 /*LINTED: cast ok */
376 n = host_read(fd, (uint8_t*)&spc->spc_hdr + spc->spc_off, left);
377 if (n == 0) {
378 return -1;
379 }
380 if (n == -1) {
381 if (errno == EAGAIN)
382 return 0;
383 return -1;
384 }
385
386 spc->spc_off += n;
387 if (spc->spc_off < HDRSZ)
388 return -1;
389
390 /*LINTED*/
391 framelen = spc->spc_hdr.rsp_len;
392
393 if (framelen < HDRSZ) {
394 return -1;
395 } else if (framelen == HDRSZ) {
396 return 1;
397 }
398
399 spc->spc_buf = malloc(framelen - HDRSZ);
400 if (spc->spc_buf == NULL) {
401 return -1;
402 }
403 memset(spc->spc_buf, 0, framelen - HDRSZ);
404
405 /* "fallthrough" */
406 } else {
407 /*LINTED*/
408 framelen = spc->spc_hdr.rsp_len;
409 }
410
411 left = framelen - spc->spc_off;
412
413 DPRINTF(("rump_sp: readframe getting body at offset %zu, left %zu\n",
414 spc->spc_off, left));
415
416 if (left == 0)
417 return 1;
418 n = host_read(fd, spc->spc_buf + (spc->spc_off - HDRSZ), left);
419 if (n == 0) {
420 return -1;
421 }
422 if (n == -1) {
423 if (errno == EAGAIN)
424 return 0;
425 return -1;
426 }
427 spc->spc_off += n;
428 left -= n;
429
430 /* got everything? */
431 if (left == 0)
432 return 1;
433 else
434 return 0;
435 }
436
437 static int
438 tcp_parse(const char *addr, struct sockaddr **sa, int allow_wildcard)
439 {
440 struct sockaddr_in sin;
441 char buf[64];
442 const char *p;
443 size_t l;
444 int port;
445
446 memset(&sin, 0, sizeof(sin));
447 sin.sin_len = sizeof(sin);
448 sin.sin_family = AF_INET;
449
450 p = strchr(addr, ':');
451 if (!p) {
452 fprintf(stderr, "rump_sp_tcp: missing port specifier\n");
453 return EINVAL;
454 }
455
456 l = p - addr;
457 if (l > sizeof(buf)-1) {
458 fprintf(stderr, "rump_sp_tcp: address too long\n");
459 return EINVAL;
460 }
461 strncpy(buf, addr, l);
462 buf[l] = '\0';
463
464 /* special INADDR_ANY treatment */
465 if (strcmp(buf, "*") == 0 || strcmp(buf, "0") == 0) {
466 sin.sin_addr.s_addr = INADDR_ANY;
467 } else {
468 switch (inet_pton(AF_INET, buf, &sin.sin_addr)) {
469 case 1:
470 break;
471 case 0:
472 fprintf(stderr, "rump_sp_tcp: cannot parse %s\n", buf);
473 return EINVAL;
474 case -1:
475 fprintf(stderr, "rump_sp_tcp: inet_pton failed\n");
476 return errno;
477 default:
478 assert(/*CONSTCOND*/0);
479 return EINVAL;
480 }
481 }
482
483 if (!allow_wildcard && sin.sin_addr.s_addr == INADDR_ANY) {
484 fprintf(stderr, "rump_sp_tcp: client needs !INADDR_ANY\n");
485 return EINVAL;
486 }
487
488 /* advance to port number & parse */
489 p++;
490 l = strspn(p, "0123456789");
491 if (l == 0) {
492 fprintf(stderr, "rump_sp_tcp: port now found: %s\n", p);
493 return EINVAL;
494 }
495 strncpy(buf, p, l);
496 buf[l] = '\0';
497
498 if (*(p+l) != '/' && *(p+l) != '\0') {
499 fprintf(stderr, "rump_sp_tcp: junk at end of port: %s\n", addr);
500 return EINVAL;
501 }
502
503 port = atoi(buf);
504 if (port < 0 || port >= (1<<(8*sizeof(in_port_t)))) {
505 fprintf(stderr, "rump_sp_tcp: port %d out of range\n", port);
506 return ERANGE;
507 }
508 sin.sin_port = htons(port);
509
510 *sa = malloc(sizeof(sin));
511 if (*sa == NULL)
512 return errno;
513 memcpy(*sa, &sin, sizeof(sin));
514 return 0;
515 }
516
517 static int
518 tcp_connecthook(int s)
519 {
520 int x;
521
522 x = 1;
523 host_setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &x, sizeof(x));
524
525 return 0;
526 }
527
528 /*ARGSUSED*/
529 static int
530 unix_parse(const char *addr, struct sockaddr **sa, int allow_wildcard)
531 {
532 struct sockaddr_un sun;
533 size_t slen;
534
535 if (strlen(addr) > sizeof(sun.sun_path))
536 return ENAMETOOLONG;
537
538 /*
539 * The pathname can be all kinds of spaghetti elementals,
540 * so meek and obidient we accept everything. However, use
541 * full path for easy cleanup in case someone gives a relative
542 * one and the server does a chdir() between now than the
543 * cleanup.
544 */
545 memset(&sun, 0, sizeof(sun));
546 sun.sun_family = AF_LOCAL;
547 if (*addr != '/') {
548 char mywd[PATH_MAX];
549
550 if (getcwd(mywd, sizeof(mywd)) == NULL) {
551 fprintf(stderr, "warning: cannot determine cwd, "
552 "omitting socket cleanup\n");
553 } else {
554 if (strlen(addr) + strlen(mywd) > sizeof(sun.sun_path))
555 return ENAMETOOLONG;
556 strlcpy(sun.sun_path, mywd, sizeof(sun.sun_path));
557 strlcat(sun.sun_path, "/", sizeof(sun.sun_path));
558 }
559 }
560 strlcat(sun.sun_path, addr, sizeof(sun.sun_path));
561 sun.sun_len = SUN_LEN(&sun);
562 slen = sun.sun_len+1; /* get the 0 too */
563
564 *sa = malloc(slen);
565 if (*sa == NULL)
566 return errno;
567 memcpy(*sa, &sun, slen);
568
569 return 0;
570 }
571
572 static void
573 unix_cleanup(struct sockaddr *sa)
574 {
575 struct sockaddr_un *sun = (void *)sa;
576
577 /*
578 * cleanup only absolute paths. see unix_parse() above
579 */
580 if (*sun->sun_path == '/') {
581 unlink(sun->sun_path);
582 }
583 }
584
585 /*ARGSUSED*/
586 static int
587 notsupp(void)
588 {
589
590 fprintf(stderr, "rump_sp: support not yet implemented\n");
591 return EOPNOTSUPP;
592 }
593
594 static int
595 success(void)
596 {
597
598 return 0;
599 }
600
601 struct {
602 const char *id;
603 int domain;
604 addrparse_fn ap;
605 connecthook_fn connhook;
606 cleanup_fn cleanup;
607 } parsetab[] = {
608 { "tcp", PF_INET, tcp_parse, tcp_connecthook, (cleanup_fn)success },
609 { "unix", PF_LOCAL, unix_parse, (connecthook_fn)success, unix_cleanup },
610 { "tcp6", PF_INET6, (addrparse_fn)notsupp, (connecthook_fn)success,
611 (cleanup_fn)success },
612 };
613 #define NPARSE (sizeof(parsetab)/sizeof(parsetab[0]))
614
615 static int
616 parseurl(const char *url, struct sockaddr **sap, unsigned *idxp,
617 int allow_wildcard)
618 {
619 char id[16];
620 const char *p, *p2;
621 size_t l;
622 unsigned i;
623 int error;
624
625 /*
626 * Parse the url
627 */
628
629 p = url;
630 p2 = strstr(p, "://");
631 if (!p2) {
632 fprintf(stderr, "rump_sp: invalid locator ``%s''\n", p);
633 return EINVAL;
634 }
635 l = p2-p;
636 if (l > sizeof(id)-1) {
637 fprintf(stderr, "rump_sp: identifier too long in ``%s''\n", p);
638 return EINVAL;
639 }
640
641 strncpy(id, p, l);
642 id[l] = '\0';
643 p2 += 3; /* beginning of address */
644
645 for (i = 0; i < NPARSE; i++) {
646 if (strcmp(id, parsetab[i].id) == 0) {
647 error = parsetab[i].ap(p2, sap, allow_wildcard);
648 if (error)
649 return error;
650 break;
651 }
652 }
653 if (i == NPARSE) {
654 fprintf(stderr, "rump_sp: invalid identifier ``%s''\n", p);
655 return EINVAL;
656 }
657
658 *idxp = i;
659 return 0;
660 }
661