rumpuser_sp.c revision 1.19 1 /* $NetBSD: rumpuser_sp.c,v 1.19 2010/11/27 18:30:51 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2010 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Sysproxy routines. This provides system RPC support over host sockets.
30 * The most notable limitation is that the client and server must share
31 * the same ABI. This does not mean that they have to be the same
32 * machine or that they need to run the same version of the host OS,
33 * just that they must agree on the data structures. This even *might*
34 * work correctly from one hardware architecture to another.
35 *
36 * Not finished yet, i.e. don't use in production. Lacks locking plus
37 * handling of multiple clients and unexpected connection closes.
38 */
39
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: rumpuser_sp.c,v 1.19 2010/11/27 18:30:51 pooka Exp $");
42
43 #include <sys/types.h>
44 #include <sys/atomic.h>
45 #include <sys/mman.h>
46 #include <sys/socket.h>
47
48 #include <arpa/inet.h>
49 #include <netinet/in.h>
50 #include <netinet/tcp.h>
51
52 #include <assert.h>
53 #include <errno.h>
54 #include <fcntl.h>
55 #include <poll.h>
56 #include <pthread.h>
57 #include <stdarg.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62
63 #include <rump/rumpuser.h>
64 #include "rumpuser_int.h"
65
66 #include "sp_common.c"
67
68 #define MAXCLI 256
69
70 static struct pollfd pfdlist[MAXCLI];
71 static struct spclient spclist[MAXCLI];
72 static unsigned int disco;
73
74 static struct rumpuser_sp_ops spops;
75
76 /*
77 * Manual wrappers, since librump does not have access to the
78 * user namespace wrapped interfaces.
79 */
80
81 static void
82 lwproc_switch(struct lwp *l)
83 {
84
85 spops.spop_schedule();
86 spops.spop_lwproc_switch(l);
87 spops.spop_unschedule();
88 }
89
90 static void
91 lwproc_release(void)
92 {
93
94 spops.spop_schedule();
95 spops.spop_lwproc_release();
96 spops.spop_unschedule();
97 }
98
99 static int
100 lwproc_newproc(struct spclient *spc)
101 {
102 int rv;
103
104 spops.spop_schedule();
105 rv = spops.spop_lwproc_newproc(spc);
106 spops.spop_unschedule();
107
108 return rv;
109 }
110
111 static int
112 lwproc_newlwp(pid_t pid)
113 {
114 int rv;
115
116 spops.spop_schedule();
117 rv = spops.spop_lwproc_newlwp(pid);
118 spops.spop_unschedule();
119
120 return rv;
121 }
122
123 static struct lwp *
124 lwproc_curlwp(void)
125 {
126 struct lwp *l;
127
128 spops.spop_schedule();
129 l = spops.spop_lwproc_curlwp();
130 spops.spop_unschedule();
131
132 return l;
133 }
134
135 static pid_t
136 lwproc_getpid(void)
137 {
138 pid_t p;
139
140 spops.spop_schedule();
141 p = spops.spop_getpid();
142 spops.spop_unschedule();
143
144 return p;
145 }
146
147 static int
148 rumpsyscall(int sysnum, void *data, register_t *retval)
149 {
150 int rv;
151
152 spops.spop_schedule();
153 rv = spops.spop_syscall(sysnum, data, retval);
154 spops.spop_unschedule();
155
156 return rv;
157 }
158
159 static uint64_t
160 nextreq(struct spclient *spc)
161 {
162 uint64_t nw;
163
164 pthread_mutex_lock(&spc->spc_mtx);
165 nw = spc->spc_nextreq++;
166 pthread_mutex_unlock(&spc->spc_mtx);
167
168 return nw;
169 }
170
171 static int
172 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error,
173 register_t *retval)
174 {
175 struct rsp_hdr rhdr;
176 struct rsp_sysresp sysresp;
177 int rv;
178
179 rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp);
180 rhdr.rsp_reqno = reqno;
181 rhdr.rsp_class = RUMPSP_RESP;
182 rhdr.rsp_type = RUMPSP_SYSCALL;
183 rhdr.rsp_sysnum = 0;
184
185 sysresp.rsys_error = error;
186 memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval));
187
188 sendlock(spc);
189 rv = dosend(spc, &rhdr, sizeof(rhdr));
190 rv = dosend(spc, &sysresp, sizeof(sysresp));
191 sendunlock(spc);
192
193 return rv;
194 }
195
196 static int
197 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen,
198 int wantstr, void **resp)
199 {
200 struct rsp_hdr rhdr;
201 struct rsp_copydata copydata;
202 struct respwait rw;
203 int rv;
204
205 DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr));
206
207 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata);
208 rhdr.rsp_class = RUMPSP_REQ;
209 if (wantstr)
210 rhdr.rsp_type = RUMPSP_COPYINSTR;
211 else
212 rhdr.rsp_type = RUMPSP_COPYIN;
213 rhdr.rsp_sysnum = 0;
214
215 copydata.rcp_addr = __UNCONST(remaddr);
216 copydata.rcp_len = *dlen;
217
218 putwait(spc, &rw, &rhdr);
219 rv = dosend(spc, &rhdr, sizeof(rhdr));
220 rv = dosend(spc, ©data, sizeof(copydata));
221 if (rv) {
222 unputwait(spc, &rw);
223 return rv;
224 }
225
226 rv = waitresp(spc, &rw);
227
228 DPRINTF(("copyin: response %d\n", rv));
229
230 *resp = rw.rw_data;
231 if (wantstr)
232 *dlen = rw.rw_dlen;
233
234 return rv;
235
236 }
237
238 static int
239 send_copyout_req(struct spclient *spc, const void *remaddr,
240 const void *data, size_t dlen)
241 {
242 struct rsp_hdr rhdr;
243 struct rsp_copydata copydata;
244 int rv;
245
246 DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr));
247
248 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen;
249 rhdr.rsp_reqno = nextreq(spc);
250 rhdr.rsp_class = RUMPSP_REQ;
251 rhdr.rsp_type = RUMPSP_COPYOUT;
252 rhdr.rsp_sysnum = 0;
253
254 copydata.rcp_addr = __UNCONST(remaddr);
255 copydata.rcp_len = dlen;
256
257 sendlock(spc);
258 rv = dosend(spc, &rhdr, sizeof(rhdr));
259 rv = dosend(spc, ©data, sizeof(copydata));
260 rv = dosend(spc, data, dlen);
261 sendunlock(spc);
262
263 return rv;
264 }
265
266 static int
267 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp)
268 {
269 struct rsp_hdr rhdr;
270 struct respwait rw;
271 int rv;
272
273 DPRINTF(("anonmmap_req: %zu bytes\n", howmuch));
274
275 rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch);
276 rhdr.rsp_class = RUMPSP_REQ;
277 rhdr.rsp_type = RUMPSP_ANONMMAP;
278 rhdr.rsp_sysnum = 0;
279
280 putwait(spc, &rw, &rhdr);
281 rv = dosend(spc, &rhdr, sizeof(rhdr));
282 rv = dosend(spc, &howmuch, sizeof(howmuch));
283 if (rv) {
284 unputwait(spc, &rw);
285 return rv;
286 }
287
288 rv = waitresp(spc, &rw);
289
290 *resp = rw.rw_data;
291
292 DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp));
293
294 return rv;
295 }
296
297 static void
298 spcref(struct spclient *spc)
299 {
300
301 pthread_mutex_lock(&spc->spc_mtx);
302 spc->spc_refcnt++;
303 pthread_mutex_unlock(&spc->spc_mtx);
304 }
305
306 static void
307 spcrelease(struct spclient *spc)
308 {
309 int ref;
310
311 pthread_mutex_lock(&spc->spc_mtx);
312 ref = --spc->spc_refcnt;
313 pthread_mutex_unlock(&spc->spc_mtx);
314
315 if (ref > 0)
316 return;
317
318 DPRINTF(("spcrelease: spc %p fd %d\n", spc, spc->spc_fd));
319
320 _DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait));
321 _DIAGASSERT(spc->spc_buf == NULL);
322
323 lwproc_switch(spc->spc_mainlwp);
324 lwproc_release();
325 spc->spc_mainlwp = NULL;
326
327 close(spc->spc_fd);
328 spc->spc_fd = -1;
329 spc->spc_dying = 0;
330
331 atomic_inc_uint(&disco);
332 }
333
334 static void
335 serv_handledisco(unsigned int idx)
336 {
337 struct spclient *spc = &spclist[idx];
338
339 DPRINTF(("rump_sp: disconnecting [%u]\n", idx));
340
341 pfdlist[idx].fd = -1;
342 pfdlist[idx].revents = 0;
343 pthread_mutex_lock(&spc->spc_mtx);
344 spc->spc_dying = 1;
345 kickall(spc);
346 pthread_mutex_unlock(&spc->spc_mtx);
347
348 /*
349 * Nobody's going to attempt to send/receive anymore,
350 * so reinit info relevant to that.
351 */
352 memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF);
353
354 spcrelease(spc);
355 }
356
357 static unsigned
358 serv_handleconn(int fd, connecthook_fn connhook, int busy)
359 {
360 struct sockaddr_storage ss;
361 socklen_t sl = sizeof(ss);
362 int newfd, flags;
363 unsigned i;
364
365 /*LINTED: cast ok */
366 newfd = accept(fd, (struct sockaddr *)&ss, &sl);
367 if (newfd == -1)
368 return 0;
369
370 if (busy) {
371 close(newfd); /* EBUSY */
372 return 0;
373 }
374
375 /* XXX: should do some sort of handshake too */
376
377 flags = fcntl(newfd, F_GETFL, 0);
378 if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) {
379 close(newfd);
380 return 0;
381 }
382
383 if (connhook(newfd) != 0) {
384 close(newfd);
385 return 0;
386 }
387
388 /* find empty slot the simple way */
389 for (i = 0; i < MAXCLI; i++) {
390 if (pfdlist[i].fd == -1 && spclist[i].spc_dying == 0)
391 break;
392 }
393
394 if (lwproc_newproc(&spclist[i]) != 0) {
395 close(newfd);
396 return 0;
397 }
398
399 assert(i < MAXCLI);
400
401 pfdlist[i].fd = newfd;
402 spclist[i].spc_fd = newfd;
403 spclist[i].spc_mainlwp = lwproc_curlwp();
404 spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */
405 spclist[i].spc_pid = lwproc_getpid();
406 spclist[i].spc_refcnt = 1;
407
408 TAILQ_INIT(&spclist[i].spc_respwait);
409
410 DPRINTF(("rump_sp: added new connection fd %d at idx %u, pid %d\n",
411 newfd, i, lwproc_getpid()));
412
413 lwproc_switch(NULL);
414
415 return i;
416 }
417
418 static void
419 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data)
420 {
421 register_t retval[2] = {0, 0};
422 int rv, sysnum;
423
424 sysnum = (int)rhdr->rsp_sysnum;
425 DPRINTF(("rump_sp: handling syscall %d from client %d\n",
426 sysnum, 0));
427
428 lwproc_newlwp(spc->spc_pid);
429 rv = rumpsyscall(sysnum, data, retval);
430 lwproc_release();
431
432 DPRINTF(("rump_sp: got return value %d & %d/%d\n",
433 rv, retval[0], retval[1]));
434
435 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
436 }
437
438 struct sysbouncearg {
439 struct spclient *sba_spc;
440 struct rsp_hdr sba_hdr;
441 uint8_t *sba_data;
442 };
443 static void *
444 serv_syscallbouncer(void *arg)
445 {
446 struct sysbouncearg *barg = arg;
447
448 serv_handlesyscall(barg->sba_spc, &barg->sba_hdr, barg->sba_data);
449 spcrelease(barg->sba_spc);
450 free(barg->sba_data);
451 free(barg);
452 return NULL;
453 }
454
455 static int
456 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr)
457 {
458 struct spclient *spc = arg;
459 void *rdata = NULL; /* XXXuninit */
460 int rv, nlocks;
461
462 rumpuser__kunlock(0, &nlocks, NULL);
463
464 rv = copyin_req(spc, raddr, len, wantstr, &rdata);
465 if (rv)
466 goto out;
467
468 memcpy(laddr, rdata, *len);
469 free(rdata);
470
471 out:
472 rumpuser__klock(nlocks, NULL);
473 if (rv)
474 return EFAULT;
475 return 0;
476 }
477
478 int
479 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len)
480 {
481
482 return sp_copyin(arg, raddr, laddr, &len, 0);
483 }
484
485 int
486 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len)
487 {
488
489 return sp_copyin(arg, raddr, laddr, len, 1);
490 }
491
492 static int
493 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
494 {
495 struct spclient *spc = arg;
496 int nlocks, rv;
497
498 rumpuser__kunlock(0, &nlocks, NULL);
499 rv = send_copyout_req(spc, raddr, laddr, dlen);
500 rumpuser__klock(nlocks, NULL);
501
502 if (rv)
503 return EFAULT;
504 return 0;
505 }
506
507 int
508 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
509 {
510
511 return sp_copyout(arg, laddr, raddr, dlen);
512 }
513
514 int
515 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen)
516 {
517
518 return sp_copyout(arg, laddr, raddr, *dlen);
519 }
520
521 int
522 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr)
523 {
524 struct spclient *spc = arg;
525 void *resp, *rdata;
526 int nlocks, rv;
527
528 rumpuser__kunlock(0, &nlocks, NULL);
529
530 rv = anonmmap_req(spc, howmuch, &rdata);
531 if (rv) {
532 rv = EFAULT;
533 goto out;
534 }
535
536 resp = *(void **)rdata;
537 free(rdata);
538
539 if (resp == NULL) {
540 rv = ENOMEM;
541 }
542
543 *addr = resp;
544
545 out:
546 rumpuser__klock(nlocks, NULL);
547
548 if (rv)
549 return rv;
550 return 0;
551 }
552
553 /*
554 *
555 * Startup routines and mainloop for server.
556 *
557 */
558
559 struct spservarg {
560 int sps_sock;
561 connecthook_fn sps_connhook;
562 };
563
564 static pthread_attr_t pattr_detached;
565 static void
566 handlereq(struct spclient *spc)
567 {
568 struct sysbouncearg *sba;
569 pthread_t pt;
570 int rv;
571
572 /* XXX: check that it's a syscall */
573
574 sba = malloc(sizeof(*sba));
575 if (sba == NULL) {
576 /* panic */
577 abort();
578 }
579
580 sba->sba_spc = spc;
581 sba->sba_hdr = spc->spc_hdr;
582 sba->sba_data = spc->spc_buf;
583
584 spc->spc_buf = NULL;
585 spc->spc_off = 0;
586
587 spcref(spc);
588 if ((rv = pthread_create(&pt, &pattr_detached,
589 serv_syscallbouncer, sba)) != 0) {
590 /* panic */
591 abort();
592 }
593 }
594
595 static void *
596 spserver(void *arg)
597 {
598 struct spservarg *sarg = arg;
599 struct spclient *spc;
600 unsigned idx;
601 int seen;
602 int rv;
603 unsigned int nfds, maxidx;
604
605 for (idx = 0; idx < MAXCLI; idx++) {
606 pfdlist[idx].fd = -1;
607 pfdlist[idx].events = POLLIN;
608
609 spc = &spclist[idx];
610 pthread_mutex_init(&spc->spc_mtx, NULL);
611 pthread_cond_init(&spc->spc_cv, NULL);
612 }
613 pfdlist[0].fd = sarg->sps_sock;
614 pfdlist[0].events = POLLIN;
615 nfds = 1;
616 maxidx = 0;
617
618 pthread_attr_init(&pattr_detached);
619 pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED);
620 /* XXX: doesn't stacksize currently work on NetBSD */
621 pthread_attr_setstacksize(&pattr_detached, 32*1024);
622
623 DPRINTF(("rump_sp: server mainloop\n"));
624
625 for (;;) {
626 int discoed;
627
628 /* g/c hangarounds (eventually) */
629 discoed = atomic_swap_uint(&disco, 0);
630 while (discoed--) {
631 nfds--;
632 idx = maxidx;
633 while (idx) {
634 if (pfdlist[idx].fd != -1) {
635 maxidx = idx;
636 break;
637 }
638 idx--;
639 }
640 DPRINTF(("rump_sp: set maxidx to [%u]\n",
641 maxidx));
642 }
643
644 DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1));
645 seen = 0;
646 rv = poll(pfdlist, maxidx+1, INFTIM);
647 assert(maxidx+1 <= MAXCLI);
648 assert(rv != 0);
649 if (rv == -1) {
650 if (errno == EINTR)
651 continue;
652 fprintf(stderr, "rump_spserver: poll returned %d\n",
653 errno);
654 break;
655 }
656
657 for (idx = 0; seen < rv && idx < MAXCLI; idx++) {
658 if ((pfdlist[idx].revents & POLLIN) == 0)
659 continue;
660
661 seen++;
662 DPRINTF(("rump_sp: activity at [%u] %d/%d\n",
663 idx, seen, rv));
664 if (idx > 0) {
665 spc = &spclist[idx];
666 DPRINTF(("rump_sp: mainloop read [%u]\n", idx));
667 switch (readframe(spc)) {
668 case 0:
669 break;
670 case -1:
671 serv_handledisco(idx);
672 break;
673 default:
674 switch (spc->spc_hdr.rsp_class) {
675 case RUMPSP_RESP:
676 kickwaiter(spc);
677 break;
678 case RUMPSP_REQ:
679 handlereq(spc);
680 break;
681 default:
682 printf("PANIC\n");
683 abort();
684 break;
685 }
686 break;
687 }
688
689 } else {
690 DPRINTF(("rump_sp: mainloop new connection\n"));
691
692 idx = serv_handleconn(pfdlist[0].fd,
693 sarg->sps_connhook, nfds == MAXCLI);
694 if (idx)
695 nfds++;
696 if (idx > maxidx)
697 maxidx = idx;
698 DPRINTF(("rump_sp: maxid now %d\n", maxidx));
699 }
700 }
701 }
702
703 return NULL;
704 }
705
706 int
707 rumpuser_sp_init(const struct rumpuser_sp_ops *spopsp, const char *url)
708 {
709 pthread_t pt;
710 struct spservarg *sarg;
711 struct sockaddr *sap;
712 char *p;
713 unsigned idx;
714 int error, s;
715
716 p = strdup(url);
717 if (p == NULL)
718 return ENOMEM;
719 error = parseurl(p, &sap, &idx, 1);
720 free(p);
721 if (error)
722 return error;
723
724 s = socket(parsetab[idx].domain, SOCK_STREAM, 0);
725 if (s == -1)
726 return errno;
727
728 spops = *spopsp;
729 sarg = malloc(sizeof(*sarg));
730 if (sarg == NULL) {
731 close(s);
732 return ENOMEM;
733 }
734
735 sarg->sps_sock = s;
736 sarg->sps_connhook = parsetab[idx].connhook;
737
738 /* sloppy error recovery */
739
740 /*LINTED*/
741 if (bind(s, sap, sap->sa_len) == -1) {
742 fprintf(stderr, "rump_sp: server bind failed\n");
743 return errno;
744 }
745 if (listen(s, MAXCLI) == -1) {
746 fprintf(stderr, "rump_sp: server listen failed\n");
747 return errno;
748 }
749
750 if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) {
751 fprintf(stderr, "rump_sp: cannot create wrkr thread\n");
752 return errno;
753 }
754 pthread_detach(pt);
755
756 return 0;
757 }
758